1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "petscsys.h" /*I "petscmat.h" I*/ 11 12 /* UGLY, ugly, ugly 13 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 14 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 15 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 16 converts the entries into single precision and then calls ..._MatScalar() to put them 17 into the single precision data structures. 18 */ 19 #if defined(PETSC_USE_MAT_SINGLE) 20 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 21 #else 22 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 23 #endif 24 25 #define CHUNKSIZE 10 26 27 /* 28 Checks for missing diagonals 29 */ 30 #undef __FUNCT__ 31 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 32 int MatMissingDiagonal_SeqBAIJ(Mat A) 33 { 34 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 35 int *diag,*jj = a->j,i,ierr; 36 37 PetscFunctionBegin; 38 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 39 diag = a->diag; 40 for (i=0; i<a->mbs; i++) { 41 if (jj[diag[i]] != i) { 42 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 43 } 44 } 45 PetscFunctionReturn(0); 46 } 47 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 50 int MatMarkDiagonal_SeqBAIJ(Mat A) 51 { 52 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 53 int i,j,*diag,m = a->mbs,ierr; 54 55 PetscFunctionBegin; 56 if (a->diag) PetscFunctionReturn(0); 57 58 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 59 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 60 for (i=0; i<m; i++) { 61 diag[i] = a->i[i+1]; 62 for (j=a->i[i]; j<a->i[i+1]; j++) { 63 if (a->j[j] == i) { 64 diag[i] = j; 65 break; 66 } 67 } 68 } 69 a->diag = diag; 70 PetscFunctionReturn(0); 71 } 72 73 74 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 78 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 79 { 80 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 81 int ierr,n = a->mbs,i; 82 83 PetscFunctionBegin; 84 *nn = n; 85 if (!ia) PetscFunctionReturn(0); 86 if (symmetric) { 87 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 88 } else if (oshift == 1) { 89 /* temporarily add 1 to i and j indices */ 90 int nz = a->i[n]; 91 for (i=0; i<nz; i++) a->j[i]++; 92 for (i=0; i<n+1; i++) a->i[i]++; 93 *ia = a->i; *ja = a->j; 94 } else { 95 *ia = a->i; *ja = a->j; 96 } 97 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 103 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 104 { 105 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 106 int i,n = a->mbs,ierr; 107 108 PetscFunctionBegin; 109 if (!ia) PetscFunctionReturn(0); 110 if (symmetric) { 111 ierr = PetscFree(*ia);CHKERRQ(ierr); 112 ierr = PetscFree(*ja);CHKERRQ(ierr); 113 } else if (oshift == 1) { 114 int nz = a->i[n]-1; 115 for (i=0; i<nz; i++) a->j[i]--; 116 for (i=0; i<n+1; i++) a->i[i]--; 117 } 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNCT__ 122 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 123 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 124 { 125 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 126 127 PetscFunctionBegin; 128 *bs = baij->bs; 129 PetscFunctionReturn(0); 130 } 131 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatDestroy_SeqBAIJ" 135 int MatDestroy_SeqBAIJ(Mat A) 136 { 137 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 138 int ierr; 139 140 PetscFunctionBegin; 141 #if defined(PETSC_USE_LOG) 142 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 143 #endif 144 ierr = PetscFree(a->a);CHKERRQ(ierr); 145 if (!a->singlemalloc) { 146 ierr = PetscFree(a->i);CHKERRQ(ierr); 147 ierr = PetscFree(a->j);CHKERRQ(ierr); 148 } 149 if (a->row) { 150 ierr = ISDestroy(a->row);CHKERRQ(ierr); 151 } 152 if (a->col) { 153 ierr = ISDestroy(a->col);CHKERRQ(ierr); 154 } 155 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 156 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 157 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 158 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 159 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 160 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 161 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 162 #if defined(PETSC_USE_MAT_SINGLE) 163 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 164 #endif 165 ierr = PetscFree(a);CHKERRQ(ierr); 166 PetscFunctionReturn(0); 167 } 168 169 #undef __FUNCT__ 170 #define __FUNCT__ "MatSetOption_SeqBAIJ" 171 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 172 { 173 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 174 175 PetscFunctionBegin; 176 switch (op) { 177 case MAT_ROW_ORIENTED: 178 a->roworiented = PETSC_TRUE; 179 break; 180 case MAT_COLUMN_ORIENTED: 181 a->roworiented = PETSC_FALSE; 182 break; 183 case MAT_COLUMNS_SORTED: 184 a->sorted = PETSC_TRUE; 185 break; 186 case MAT_COLUMNS_UNSORTED: 187 a->sorted = PETSC_FALSE; 188 break; 189 case MAT_KEEP_ZEROED_ROWS: 190 a->keepzeroedrows = PETSC_TRUE; 191 break; 192 case MAT_NO_NEW_NONZERO_LOCATIONS: 193 a->nonew = 1; 194 break; 195 case MAT_NEW_NONZERO_LOCATION_ERR: 196 a->nonew = -1; 197 break; 198 case MAT_NEW_NONZERO_ALLOCATION_ERR: 199 a->nonew = -2; 200 break; 201 case MAT_YES_NEW_NONZERO_LOCATIONS: 202 a->nonew = 0; 203 break; 204 case MAT_ROWS_SORTED: 205 case MAT_ROWS_UNSORTED: 206 case MAT_YES_NEW_DIAGONALS: 207 case MAT_IGNORE_OFF_PROC_ENTRIES: 208 case MAT_USE_HASH_TABLE: 209 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 210 break; 211 case MAT_NO_NEW_DIAGONALS: 212 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 213 case MAT_USE_SINGLE_PRECISION_SOLVES: 214 if (a->bs==4) { 215 PetscTruth sse_enabled_local,sse_enabled_global; 216 int ierr; 217 218 sse_enabled_local = PETSC_FALSE; 219 sse_enabled_global = PETSC_FALSE; 220 221 ierr = PetscSSEIsEnabled(A->comm,&sse_enabled_local,&sse_enabled_global);CHKERRQ(ierr); 222 #if defined(PETSC_HAVE_SSE) 223 if (sse_enabled_local) { 224 a->single_precision_solves = PETSC_TRUE; 225 A->ops->solve = MatSolve_SeqBAIJ_Update; 226 A->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 227 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES set\n"); 228 break; 229 } else { 230 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 231 } 232 #else 233 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 234 #endif 235 } 236 break; 237 default: 238 SETERRQ(PETSC_ERR_SUP,"unknown option"); 239 } 240 PetscFunctionReturn(0); 241 } 242 243 #undef __FUNCT__ 244 #define __FUNCT__ "MatGetRow_SeqBAIJ" 245 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 246 { 247 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 248 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 249 MatScalar *aa,*aa_i; 250 PetscScalar *v_i; 251 252 PetscFunctionBegin; 253 bs = a->bs; 254 ai = a->i; 255 aj = a->j; 256 aa = a->a; 257 bs2 = a->bs2; 258 259 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 260 261 bn = row/bs; /* Block number */ 262 bp = row % bs; /* Block Position */ 263 M = ai[bn+1] - ai[bn]; 264 *nz = bs*M; 265 266 if (v) { 267 *v = 0; 268 if (*nz) { 269 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 270 for (i=0; i<M; i++) { /* for each block in the block row */ 271 v_i = *v + i*bs; 272 aa_i = aa + bs2*(ai[bn] + i); 273 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 274 } 275 } 276 } 277 278 if (idx) { 279 *idx = 0; 280 if (*nz) { 281 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 282 for (i=0; i<M; i++) { /* for each block in the block row */ 283 idx_i = *idx + i*bs; 284 itmp = bs*aj[ai[bn] + i]; 285 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 286 } 287 } 288 } 289 PetscFunctionReturn(0); 290 } 291 292 #undef __FUNCT__ 293 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 294 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 295 { 296 int ierr; 297 298 PetscFunctionBegin; 299 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 300 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 301 PetscFunctionReturn(0); 302 } 303 304 #undef __FUNCT__ 305 #define __FUNCT__ "MatTranspose_SeqBAIJ" 306 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 307 { 308 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 309 Mat C; 310 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 311 int *rows,*cols,bs2=a->bs2; 312 PetscScalar *array; 313 314 PetscFunctionBegin; 315 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 316 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 317 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 318 319 #if defined(PETSC_USE_MAT_SINGLE) 320 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 321 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 322 #else 323 array = a->a; 324 #endif 325 326 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 327 ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 328 ierr = PetscFree(col);CHKERRQ(ierr); 329 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 330 cols = rows + bs; 331 for (i=0; i<mbs; i++) { 332 cols[0] = i*bs; 333 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 334 len = ai[i+1] - ai[i]; 335 for (j=0; j<len; j++) { 336 rows[0] = (*aj++)*bs; 337 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 338 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 339 array += bs2; 340 } 341 } 342 ierr = PetscFree(rows);CHKERRQ(ierr); 343 #if defined(PETSC_USE_MAT_SINGLE) 344 ierr = PetscFree(array); 345 #endif 346 347 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 348 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 349 350 if (B) { 351 *B = C; 352 } else { 353 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 354 } 355 PetscFunctionReturn(0); 356 } 357 358 #undef __FUNCT__ 359 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 360 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 361 { 362 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 363 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 364 PetscScalar *aa; 365 FILE *file; 366 367 PetscFunctionBegin; 368 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 369 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 370 col_lens[0] = MAT_FILE_COOKIE; 371 372 col_lens[1] = A->m; 373 col_lens[2] = A->n; 374 col_lens[3] = a->nz*bs2; 375 376 /* store lengths of each row and write (including header) to file */ 377 count = 0; 378 for (i=0; i<a->mbs; i++) { 379 for (j=0; j<bs; j++) { 380 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 381 } 382 } 383 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 384 ierr = PetscFree(col_lens);CHKERRQ(ierr); 385 386 /* store column indices (zero start index) */ 387 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 388 count = 0; 389 for (i=0; i<a->mbs; i++) { 390 for (j=0; j<bs; j++) { 391 for (k=a->i[i]; k<a->i[i+1]; k++) { 392 for (l=0; l<bs; l++) { 393 jj[count++] = bs*a->j[k] + l; 394 } 395 } 396 } 397 } 398 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 399 ierr = PetscFree(jj);CHKERRQ(ierr); 400 401 /* store nonzero values */ 402 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 403 count = 0; 404 for (i=0; i<a->mbs; i++) { 405 for (j=0; j<bs; j++) { 406 for (k=a->i[i]; k<a->i[i+1]; k++) { 407 for (l=0; l<bs; l++) { 408 aa[count++] = a->a[bs2*k + l*bs + j]; 409 } 410 } 411 } 412 } 413 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 414 ierr = PetscFree(aa);CHKERRQ(ierr); 415 416 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 417 if (file) { 418 fprintf(file,"-matload_block_size %d\n",a->bs); 419 } 420 PetscFunctionReturn(0); 421 } 422 423 #undef __FUNCT__ 424 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 425 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 426 { 427 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 428 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 429 PetscViewerFormat format; 430 431 PetscFunctionBegin; 432 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 433 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) { 434 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 435 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 436 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 437 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 438 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 439 for (i=0; i<a->mbs; i++) { 440 for (j=0; j<bs; j++) { 441 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 442 for (k=a->i[i]; k<a->i[i+1]; k++) { 443 for (l=0; l<bs; l++) { 444 #if defined(PETSC_USE_COMPLEX) 445 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 446 ierr = PetscViewerASCIIPrintf(viewer," %d %g + %gi",bs*a->j[k]+l, 447 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 448 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 449 ierr = PetscViewerASCIIPrintf(viewer," %d %g - %gi",bs*a->j[k]+l, 450 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 451 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 452 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 453 } 454 #else 455 if (a->a[bs2*k + l*bs + j] != 0.0) { 456 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 457 } 458 #endif 459 } 460 } 461 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 462 } 463 } 464 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 465 } else { 466 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 467 for (i=0; i<a->mbs; i++) { 468 for (j=0; j<bs; j++) { 469 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 470 for (k=a->i[i]; k<a->i[i+1]; k++) { 471 for (l=0; l<bs; l++) { 472 #if defined(PETSC_USE_COMPLEX) 473 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 474 ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 475 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 476 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 477 ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 478 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 479 } else { 480 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 481 } 482 #else 483 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 484 #endif 485 } 486 } 487 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 488 } 489 } 490 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 491 } 492 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 493 PetscFunctionReturn(0); 494 } 495 496 #undef __FUNCT__ 497 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 498 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 499 { 500 Mat A = (Mat) Aa; 501 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 502 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 503 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 504 MatScalar *aa; 505 PetscViewer viewer; 506 507 PetscFunctionBegin; 508 509 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 510 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 511 512 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 513 514 /* loop over matrix elements drawing boxes */ 515 color = PETSC_DRAW_BLUE; 516 for (i=0,row=0; i<mbs; i++,row+=bs) { 517 for (j=a->i[i]; j<a->i[i+1]; j++) { 518 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 519 x_l = a->j[j]*bs; x_r = x_l + 1.0; 520 aa = a->a + j*bs2; 521 for (k=0; k<bs; k++) { 522 for (l=0; l<bs; l++) { 523 if (PetscRealPart(*aa++) >= 0.) continue; 524 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 525 } 526 } 527 } 528 } 529 color = PETSC_DRAW_CYAN; 530 for (i=0,row=0; i<mbs; i++,row+=bs) { 531 for (j=a->i[i]; j<a->i[i+1]; j++) { 532 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 533 x_l = a->j[j]*bs; x_r = x_l + 1.0; 534 aa = a->a + j*bs2; 535 for (k=0; k<bs; k++) { 536 for (l=0; l<bs; l++) { 537 if (PetscRealPart(*aa++) != 0.) continue; 538 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 539 } 540 } 541 } 542 } 543 544 color = PETSC_DRAW_RED; 545 for (i=0,row=0; i<mbs; i++,row+=bs) { 546 for (j=a->i[i]; j<a->i[i+1]; j++) { 547 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 548 x_l = a->j[j]*bs; x_r = x_l + 1.0; 549 aa = a->a + j*bs2; 550 for (k=0; k<bs; k++) { 551 for (l=0; l<bs; l++) { 552 if (PetscRealPart(*aa++) <= 0.) continue; 553 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 554 } 555 } 556 } 557 } 558 PetscFunctionReturn(0); 559 } 560 561 #undef __FUNCT__ 562 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 563 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 564 { 565 int ierr; 566 PetscReal xl,yl,xr,yr,w,h; 567 PetscDraw draw; 568 PetscTruth isnull; 569 570 PetscFunctionBegin; 571 572 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 573 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 574 575 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 576 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 577 xr += w; yr += h; xl = -w; yl = -h; 578 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 579 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 580 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 581 PetscFunctionReturn(0); 582 } 583 584 #undef __FUNCT__ 585 #define __FUNCT__ "MatView_SeqBAIJ" 586 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 587 { 588 int ierr; 589 PetscTruth issocket,isascii,isbinary,isdraw; 590 591 PetscFunctionBegin; 592 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 593 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 594 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 595 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 596 if (issocket) { 597 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 598 } else if (isascii){ 599 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 600 } else if (isbinary) { 601 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 602 } else if (isdraw) { 603 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 604 } else { 605 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 606 } 607 PetscFunctionReturn(0); 608 } 609 610 611 #undef __FUNCT__ 612 #define __FUNCT__ "MatGetValues_SeqBAIJ" 613 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 614 { 615 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 616 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 617 int *ai = a->i,*ailen = a->ilen; 618 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 619 MatScalar *ap,*aa = a->a,zero = 0.0; 620 621 PetscFunctionBegin; 622 for (k=0; k<m; k++) { /* loop over rows */ 623 row = im[k]; brow = row/bs; 624 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 625 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 626 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 627 nrow = ailen[brow]; 628 for (l=0; l<n; l++) { /* loop over columns */ 629 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 630 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 631 col = in[l] ; 632 bcol = col/bs; 633 cidx = col%bs; 634 ridx = row%bs; 635 high = nrow; 636 low = 0; /* assume unsorted */ 637 while (high-low > 5) { 638 t = (low+high)/2; 639 if (rp[t] > bcol) high = t; 640 else low = t; 641 } 642 for (i=low; i<high; i++) { 643 if (rp[i] > bcol) break; 644 if (rp[i] == bcol) { 645 *v++ = ap[bs2*i+bs*cidx+ridx]; 646 goto finished; 647 } 648 } 649 *v++ = zero; 650 finished:; 651 } 652 } 653 PetscFunctionReturn(0); 654 } 655 656 #if defined(PETSC_USE_MAT_SINGLE) 657 #undef __FUNCT__ 658 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 659 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 660 { 661 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 662 int ierr,i,N = m*n*b->bs2; 663 MatScalar *vsingle; 664 665 PetscFunctionBegin; 666 if (N > b->setvalueslen) { 667 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 668 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 669 b->setvalueslen = N; 670 } 671 vsingle = b->setvaluescopy; 672 for (i=0; i<N; i++) { 673 vsingle[i] = v[i]; 674 } 675 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 676 PetscFunctionReturn(0); 677 } 678 #endif 679 680 681 #undef __FUNCT__ 682 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 683 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is) 684 { 685 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 686 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 687 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 688 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 689 PetscTruth roworiented=a->roworiented; 690 MatScalar *value = v,*ap,*aa = a->a,*bap; 691 692 PetscFunctionBegin; 693 if (roworiented) { 694 stepval = (n-1)*bs; 695 } else { 696 stepval = (m-1)*bs; 697 } 698 for (k=0; k<m; k++) { /* loop over added rows */ 699 row = im[k]; 700 if (row < 0) continue; 701 #if defined(PETSC_USE_BOPT_g) 702 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 703 #endif 704 rp = aj + ai[row]; 705 ap = aa + bs2*ai[row]; 706 rmax = imax[row]; 707 nrow = ailen[row]; 708 low = 0; 709 for (l=0; l<n; l++) { /* loop over added columns */ 710 if (in[l] < 0) continue; 711 #if defined(PETSC_USE_BOPT_g) 712 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 713 #endif 714 col = in[l]; 715 if (roworiented) { 716 value = v + k*(stepval+bs)*bs + l*bs; 717 } else { 718 value = v + l*(stepval+bs)*bs + k*bs; 719 } 720 if (!sorted) low = 0; high = nrow; 721 while (high-low > 7) { 722 t = (low+high)/2; 723 if (rp[t] > col) high = t; 724 else low = t; 725 } 726 for (i=low; i<high; i++) { 727 if (rp[i] > col) break; 728 if (rp[i] == col) { 729 bap = ap + bs2*i; 730 if (roworiented) { 731 if (is == ADD_VALUES) { 732 for (ii=0; ii<bs; ii++,value+=stepval) { 733 for (jj=ii; jj<bs2; jj+=bs) { 734 bap[jj] += *value++; 735 } 736 } 737 } else { 738 for (ii=0; ii<bs; ii++,value+=stepval) { 739 for (jj=ii; jj<bs2; jj+=bs) { 740 bap[jj] = *value++; 741 } 742 } 743 } 744 } else { 745 if (is == ADD_VALUES) { 746 for (ii=0; ii<bs; ii++,value+=stepval) { 747 for (jj=0; jj<bs; jj++) { 748 *bap++ += *value++; 749 } 750 } 751 } else { 752 for (ii=0; ii<bs; ii++,value+=stepval) { 753 for (jj=0; jj<bs; jj++) { 754 *bap++ = *value++; 755 } 756 } 757 } 758 } 759 goto noinsert2; 760 } 761 } 762 if (nonew == 1) goto noinsert2; 763 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 764 if (nrow >= rmax) { 765 /* there is no extra room in row, therefore enlarge */ 766 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 767 MatScalar *new_a; 768 769 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 770 771 /* malloc new storage space */ 772 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 773 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 774 new_j = (int*)(new_a + bs2*new_nz); 775 new_i = new_j + new_nz; 776 777 /* copy over old data into new slots */ 778 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 779 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 780 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 781 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 782 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 783 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 784 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 785 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 786 /* free up old matrix storage */ 787 ierr = PetscFree(a->a);CHKERRQ(ierr); 788 if (!a->singlemalloc) { 789 ierr = PetscFree(a->i);CHKERRQ(ierr); 790 ierr = PetscFree(a->j);CHKERRQ(ierr); 791 } 792 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 793 a->singlemalloc = PETSC_TRUE; 794 795 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 796 rmax = imax[row] = imax[row] + CHUNKSIZE; 797 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 798 a->maxnz += bs2*CHUNKSIZE; 799 a->reallocs++; 800 a->nz++; 801 } 802 N = nrow++ - 1; 803 /* shift up all the later entries in this row */ 804 for (ii=N; ii>=i; ii--) { 805 rp[ii+1] = rp[ii]; 806 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 807 } 808 if (N >= i) { 809 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 810 } 811 rp[i] = col; 812 bap = ap + bs2*i; 813 if (roworiented) { 814 for (ii=0; ii<bs; ii++,value+=stepval) { 815 for (jj=ii; jj<bs2; jj+=bs) { 816 bap[jj] = *value++; 817 } 818 } 819 } else { 820 for (ii=0; ii<bs; ii++,value+=stepval) { 821 for (jj=0; jj<bs; jj++) { 822 *bap++ = *value++; 823 } 824 } 825 } 826 noinsert2:; 827 low = i; 828 } 829 ailen[row] = nrow; 830 } 831 PetscFunctionReturn(0); 832 } 833 834 EXTERN int MatUseDSCPACK_MPIBAIJ(Mat); 835 #undef __FUNCT__ 836 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 837 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 838 { 839 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 840 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 841 int m = A->m,*ip,N,*ailen = a->ilen; 842 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 843 MatScalar *aa = a->a,*ap; 844 #if defined(PETSC_HAVE_DSCPACK) 845 PetscTruth flag; 846 #endif 847 848 PetscFunctionBegin; 849 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 850 851 if (m) rmax = ailen[0]; 852 for (i=1; i<mbs; i++) { 853 /* move each row back by the amount of empty slots (fshift) before it*/ 854 fshift += imax[i-1] - ailen[i-1]; 855 rmax = PetscMax(rmax,ailen[i]); 856 if (fshift) { 857 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 858 N = ailen[i]; 859 for (j=0; j<N; j++) { 860 ip[j-fshift] = ip[j]; 861 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 862 } 863 } 864 ai[i] = ai[i-1] + ailen[i-1]; 865 } 866 if (mbs) { 867 fshift += imax[mbs-1] - ailen[mbs-1]; 868 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 869 } 870 /* reset ilen and imax for each row */ 871 for (i=0; i<mbs; i++) { 872 ailen[i] = imax[i] = ai[i+1] - ai[i]; 873 } 874 a->nz = ai[mbs]; 875 876 /* diagonals may have moved, so kill the diagonal pointers */ 877 if (fshift && a->diag) { 878 ierr = PetscFree(a->diag);CHKERRQ(ierr); 879 PetscLogObjectMemory(A,-(mbs+1)*sizeof(int)); 880 a->diag = 0; 881 } 882 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 883 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 884 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 885 a->reallocs = 0; 886 A->info.nz_unneeded = (PetscReal)fshift*bs2; 887 #if defined(PETSC_HAVE_DSCPACK) 888 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_baij_dscpack",&flag);CHKERRQ(ierr); 889 if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(A);CHKERRQ(ierr); } 890 #endif 891 892 893 PetscFunctionReturn(0); 894 } 895 896 897 898 /* 899 This function returns an array of flags which indicate the locations of contiguous 900 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 901 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 902 Assume: sizes should be long enough to hold all the values. 903 */ 904 #undef __FUNCT__ 905 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 906 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 907 { 908 int i,j,k,row; 909 PetscTruth flg; 910 911 PetscFunctionBegin; 912 for (i=0,j=0; i<n; j++) { 913 row = idx[i]; 914 if (row%bs!=0) { /* Not the begining of a block */ 915 sizes[j] = 1; 916 i++; 917 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 918 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 919 i++; 920 } else { /* Begining of the block, so check if the complete block exists */ 921 flg = PETSC_TRUE; 922 for (k=1; k<bs; k++) { 923 if (row+k != idx[i+k]) { /* break in the block */ 924 flg = PETSC_FALSE; 925 break; 926 } 927 } 928 if (flg == PETSC_TRUE) { /* No break in the bs */ 929 sizes[j] = bs; 930 i+= bs; 931 } else { 932 sizes[j] = 1; 933 i++; 934 } 935 } 936 } 937 *bs_max = j; 938 PetscFunctionReturn(0); 939 } 940 941 #undef __FUNCT__ 942 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 943 int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag) 944 { 945 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 946 int ierr,i,j,k,count,is_n,*is_idx,*rows; 947 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 948 PetscScalar zero = 0.0; 949 MatScalar *aa; 950 951 PetscFunctionBegin; 952 /* Make a copy of the IS and sort it */ 953 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 954 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 955 956 /* allocate memory for rows,sizes */ 957 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 958 sizes = rows + is_n; 959 960 /* copy IS values to rows, and sort them */ 961 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 962 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 963 if (baij->keepzeroedrows) { 964 for (i=0; i<is_n; i++) { sizes[i] = 1; } 965 bs_max = is_n; 966 } else { 967 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 968 } 969 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 970 971 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 972 row = rows[j]; 973 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 974 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 975 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 976 if (sizes[i] == bs && !baij->keepzeroedrows) { 977 if (diag) { 978 if (baij->ilen[row/bs] > 0) { 979 baij->ilen[row/bs] = 1; 980 baij->j[baij->i[row/bs]] = row/bs; 981 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 982 } 983 /* Now insert all the diagonal values for this bs */ 984 for (k=0; k<bs; k++) { 985 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 986 } 987 } else { /* (!diag) */ 988 baij->ilen[row/bs] = 0; 989 } /* end (!diag) */ 990 } else { /* (sizes[i] != bs) */ 991 #if defined (PETSC_USE_DEBUG) 992 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 993 #endif 994 for (k=0; k<count; k++) { 995 aa[0] = zero; 996 aa += bs; 997 } 998 if (diag) { 999 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1000 } 1001 } 1002 } 1003 1004 ierr = PetscFree(rows);CHKERRQ(ierr); 1005 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1006 PetscFunctionReturn(0); 1007 } 1008 1009 #undef __FUNCT__ 1010 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1011 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 1012 { 1013 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1014 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1015 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1016 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1017 int ridx,cidx,bs2=a->bs2,ierr; 1018 PetscTruth roworiented=a->roworiented; 1019 MatScalar *ap,value,*aa=a->a,*bap; 1020 1021 PetscFunctionBegin; 1022 for (k=0; k<m; k++) { /* loop over added rows */ 1023 row = im[k]; brow = row/bs; 1024 if (row < 0) continue; 1025 #if defined(PETSC_USE_BOPT_g) 1026 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 1027 #endif 1028 rp = aj + ai[brow]; 1029 ap = aa + bs2*ai[brow]; 1030 rmax = imax[brow]; 1031 nrow = ailen[brow]; 1032 low = 0; 1033 for (l=0; l<n; l++) { /* loop over added columns */ 1034 if (in[l] < 0) continue; 1035 #if defined(PETSC_USE_BOPT_g) 1036 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n); 1037 #endif 1038 col = in[l]; bcol = col/bs; 1039 ridx = row % bs; cidx = col % bs; 1040 if (roworiented) { 1041 value = v[l + k*n]; 1042 } else { 1043 value = v[k + l*m]; 1044 } 1045 if (!sorted) low = 0; high = nrow; 1046 while (high-low > 7) { 1047 t = (low+high)/2; 1048 if (rp[t] > bcol) high = t; 1049 else low = t; 1050 } 1051 for (i=low; i<high; i++) { 1052 if (rp[i] > bcol) break; 1053 if (rp[i] == bcol) { 1054 bap = ap + bs2*i + bs*cidx + ridx; 1055 if (is == ADD_VALUES) *bap += value; 1056 else *bap = value; 1057 goto noinsert1; 1058 } 1059 } 1060 if (nonew == 1) goto noinsert1; 1061 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1062 if (nrow >= rmax) { 1063 /* there is no extra room in row, therefore enlarge */ 1064 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1065 MatScalar *new_a; 1066 1067 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1068 1069 /* Malloc new storage space */ 1070 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1071 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1072 new_j = (int*)(new_a + bs2*new_nz); 1073 new_i = new_j + new_nz; 1074 1075 /* copy over old data into new slots */ 1076 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1077 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1078 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1079 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1080 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1081 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1082 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1083 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1084 /* free up old matrix storage */ 1085 ierr = PetscFree(a->a);CHKERRQ(ierr); 1086 if (!a->singlemalloc) { 1087 ierr = PetscFree(a->i);CHKERRQ(ierr); 1088 ierr = PetscFree(a->j);CHKERRQ(ierr); 1089 } 1090 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1091 a->singlemalloc = PETSC_TRUE; 1092 1093 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1094 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1095 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1096 a->maxnz += bs2*CHUNKSIZE; 1097 a->reallocs++; 1098 a->nz++; 1099 } 1100 N = nrow++ - 1; 1101 /* shift up all the later entries in this row */ 1102 for (ii=N; ii>=i; ii--) { 1103 rp[ii+1] = rp[ii]; 1104 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1105 } 1106 if (N>=i) { 1107 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1108 } 1109 rp[i] = bcol; 1110 ap[bs2*i + bs*cidx + ridx] = value; 1111 noinsert1:; 1112 low = i; 1113 } 1114 ailen[brow] = nrow; 1115 } 1116 PetscFunctionReturn(0); 1117 } 1118 1119 1120 #undef __FUNCT__ 1121 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1122 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1123 { 1124 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1125 Mat outA; 1126 int ierr; 1127 PetscTruth row_identity,col_identity; 1128 1129 PetscFunctionBegin; 1130 if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1131 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1132 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1133 if (!row_identity || !col_identity) { 1134 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1135 } 1136 1137 outA = inA; 1138 inA->factor = FACTOR_LU; 1139 1140 if (!a->diag) { 1141 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1142 } 1143 1144 a->row = row; 1145 a->col = col; 1146 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1147 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1148 1149 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1150 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1151 PetscLogObjectParent(inA,a->icol); 1152 1153 /* 1154 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1155 for ILU(0) factorization with natural ordering 1156 */ 1157 if (a->bs < 8) { 1158 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1159 } else { 1160 if (!a->solve_work) { 1161 ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1162 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1163 } 1164 } 1165 1166 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1167 1168 PetscFunctionReturn(0); 1169 } 1170 #undef __FUNCT__ 1171 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1172 int MatPrintHelp_SeqBAIJ(Mat A) 1173 { 1174 static PetscTruth called = PETSC_FALSE; 1175 MPI_Comm comm = A->comm; 1176 int ierr; 1177 1178 PetscFunctionBegin; 1179 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1180 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1181 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1182 PetscFunctionReturn(0); 1183 } 1184 1185 EXTERN_C_BEGIN 1186 #undef __FUNCT__ 1187 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1188 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1189 { 1190 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1191 int i,nz,nbs; 1192 1193 PetscFunctionBegin; 1194 nz = baij->maxnz/baij->bs2; 1195 nbs = baij->nbs; 1196 for (i=0; i<nz; i++) { 1197 baij->j[i] = indices[i]; 1198 } 1199 baij->nz = nz; 1200 for (i=0; i<nbs; i++) { 1201 baij->ilen[i] = baij->imax[i]; 1202 } 1203 1204 PetscFunctionReturn(0); 1205 } 1206 EXTERN_C_END 1207 1208 #undef __FUNCT__ 1209 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1210 /*@ 1211 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1212 in the matrix. 1213 1214 Input Parameters: 1215 + mat - the SeqBAIJ matrix 1216 - indices - the column indices 1217 1218 Level: advanced 1219 1220 Notes: 1221 This can be called if you have precomputed the nonzero structure of the 1222 matrix and want to provide it to the matrix object to improve the performance 1223 of the MatSetValues() operation. 1224 1225 You MUST have set the correct numbers of nonzeros per row in the call to 1226 MatCreateSeqBAIJ(). 1227 1228 MUST be called before any calls to MatSetValues(); 1229 1230 @*/ 1231 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1232 { 1233 int ierr,(*f)(Mat,int *); 1234 1235 PetscFunctionBegin; 1236 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1237 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1238 if (f) { 1239 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1240 } else { 1241 SETERRQ(1,"Wrong type of matrix to set column indices"); 1242 } 1243 PetscFunctionReturn(0); 1244 } 1245 1246 #undef __FUNCT__ 1247 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1248 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1249 { 1250 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1251 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1252 PetscReal atmp; 1253 PetscScalar *x,zero = 0.0; 1254 MatScalar *aa; 1255 int ncols,brow,krow,kcol; 1256 1257 PetscFunctionBegin; 1258 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1259 bs = a->bs; 1260 aa = a->a; 1261 ai = a->i; 1262 aj = a->j; 1263 mbs = a->mbs; 1264 1265 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1266 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1267 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1268 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1269 for (i=0; i<mbs; i++) { 1270 ncols = ai[1] - ai[0]; ai++; 1271 brow = bs*i; 1272 for (j=0; j<ncols; j++){ 1273 /* bcol = bs*(*aj); */ 1274 for (kcol=0; kcol<bs; kcol++){ 1275 for (krow=0; krow<bs; krow++){ 1276 atmp = PetscAbsScalar(*aa); aa++; 1277 row = brow + krow; /* row index */ 1278 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1279 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1280 } 1281 } 1282 aj++; 1283 } 1284 } 1285 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1286 PetscFunctionReturn(0); 1287 } 1288 1289 #undef __FUNCT__ 1290 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1291 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1292 { 1293 int ierr; 1294 1295 PetscFunctionBegin; 1296 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1297 PetscFunctionReturn(0); 1298 } 1299 1300 #undef __FUNCT__ 1301 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1302 int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array) 1303 { 1304 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1305 PetscFunctionBegin; 1306 *array = a->a; 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1312 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array) 1313 { 1314 PetscFunctionBegin; 1315 PetscFunctionReturn(0); 1316 } 1317 1318 /* -------------------------------------------------------------------*/ 1319 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1320 MatGetRow_SeqBAIJ, 1321 MatRestoreRow_SeqBAIJ, 1322 MatMult_SeqBAIJ_N, 1323 MatMultAdd_SeqBAIJ_N, 1324 MatMultTranspose_SeqBAIJ, 1325 MatMultTransposeAdd_SeqBAIJ, 1326 MatSolve_SeqBAIJ_N, 1327 0, 1328 0, 1329 0, 1330 MatLUFactor_SeqBAIJ, 1331 0, 1332 0, 1333 MatTranspose_SeqBAIJ, 1334 MatGetInfo_SeqBAIJ, 1335 MatEqual_SeqBAIJ, 1336 MatGetDiagonal_SeqBAIJ, 1337 MatDiagonalScale_SeqBAIJ, 1338 MatNorm_SeqBAIJ, 1339 0, 1340 MatAssemblyEnd_SeqBAIJ, 1341 0, 1342 MatSetOption_SeqBAIJ, 1343 MatZeroEntries_SeqBAIJ, 1344 MatZeroRows_SeqBAIJ, 1345 MatLUFactorSymbolic_SeqBAIJ, 1346 MatLUFactorNumeric_SeqBAIJ_N, 1347 0, 1348 0, 1349 MatSetUpPreallocation_SeqBAIJ, 1350 MatILUFactorSymbolic_SeqBAIJ, 1351 0, 1352 MatGetArray_SeqBAIJ, 1353 MatRestoreArray_SeqBAIJ, 1354 MatDuplicate_SeqBAIJ, 1355 0, 1356 0, 1357 MatILUFactor_SeqBAIJ, 1358 0, 1359 0, 1360 MatGetSubMatrices_SeqBAIJ, 1361 MatIncreaseOverlap_SeqBAIJ, 1362 MatGetValues_SeqBAIJ, 1363 0, 1364 MatPrintHelp_SeqBAIJ, 1365 MatScale_SeqBAIJ, 1366 0, 1367 0, 1368 0, 1369 MatGetBlockSize_SeqBAIJ, 1370 MatGetRowIJ_SeqBAIJ, 1371 MatRestoreRowIJ_SeqBAIJ, 1372 0, 1373 0, 1374 0, 1375 0, 1376 0, 1377 0, 1378 MatSetValuesBlocked_SeqBAIJ, 1379 MatGetSubMatrix_SeqBAIJ, 1380 MatDestroy_SeqBAIJ, 1381 MatView_SeqBAIJ, 1382 MatGetPetscMaps_Petsc, 1383 0, 1384 0, 1385 0, 1386 0, 1387 0, 1388 0, 1389 MatGetRowMax_SeqBAIJ, 1390 MatConvert_Basic}; 1391 1392 EXTERN_C_BEGIN 1393 #undef __FUNCT__ 1394 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 1395 int MatStoreValues_SeqBAIJ(Mat mat) 1396 { 1397 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1398 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1399 int ierr; 1400 1401 PetscFunctionBegin; 1402 if (aij->nonew != 1) { 1403 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1404 } 1405 1406 /* allocate space for values if not already there */ 1407 if (!aij->saved_values) { 1408 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1409 } 1410 1411 /* copy values over */ 1412 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1413 PetscFunctionReturn(0); 1414 } 1415 EXTERN_C_END 1416 1417 EXTERN_C_BEGIN 1418 #undef __FUNCT__ 1419 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 1420 int MatRetrieveValues_SeqBAIJ(Mat mat) 1421 { 1422 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1423 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1424 1425 PetscFunctionBegin; 1426 if (aij->nonew != 1) { 1427 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1428 } 1429 if (!aij->saved_values) { 1430 SETERRQ(1,"Must call MatStoreValues(A);first"); 1431 } 1432 1433 /* copy values over */ 1434 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1435 PetscFunctionReturn(0); 1436 } 1437 EXTERN_C_END 1438 1439 EXTERN_C_BEGIN 1440 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*); 1441 EXTERN_C_END 1442 1443 EXTERN_C_BEGIN 1444 #undef __FUNCT__ 1445 #define __FUNCT__ "MatCreate_SeqBAIJ" 1446 int MatCreate_SeqBAIJ(Mat B) 1447 { 1448 int ierr,size; 1449 Mat_SeqBAIJ *b; 1450 1451 PetscFunctionBegin; 1452 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1453 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1454 1455 B->m = B->M = PetscMax(B->m,B->M); 1456 B->n = B->N = PetscMax(B->n,B->N); 1457 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1458 B->data = (void*)b; 1459 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1460 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1461 B->factor = 0; 1462 B->lupivotthreshold = 1.0; 1463 B->mapping = 0; 1464 b->row = 0; 1465 b->col = 0; 1466 b->icol = 0; 1467 b->reallocs = 0; 1468 b->saved_values = 0; 1469 #if defined(PETSC_USE_MAT_SINGLE) 1470 b->setvalueslen = 0; 1471 b->setvaluescopy = PETSC_NULL; 1472 #endif 1473 b->single_precision_solves = PETSC_FALSE; 1474 1475 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 1476 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1477 1478 b->sorted = PETSC_FALSE; 1479 b->roworiented = PETSC_TRUE; 1480 b->nonew = 0; 1481 b->diag = 0; 1482 b->solve_work = 0; 1483 b->mult_work = 0; 1484 b->spptr = 0; 1485 B->info.nz_unneeded = (PetscReal)b->maxnz; 1486 b->keepzeroedrows = PETSC_FALSE; 1487 1488 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1489 "MatStoreValues_SeqBAIJ", 1490 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1491 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1492 "MatRetrieveValues_SeqBAIJ", 1493 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1494 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1495 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1496 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1497 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C", 1498 "MatConvert_SeqBAIJ_SeqAIJ", 1499 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 1500 PetscFunctionReturn(0); 1501 } 1502 EXTERN_C_END 1503 1504 #undef __FUNCT__ 1505 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 1506 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1507 { 1508 Mat C; 1509 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1510 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1511 1512 PetscFunctionBegin; 1513 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1514 1515 *B = 0; 1516 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1517 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1518 c = (Mat_SeqBAIJ*)C->data; 1519 1520 c->bs = a->bs; 1521 c->bs2 = a->bs2; 1522 c->mbs = a->mbs; 1523 c->nbs = a->nbs; 1524 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1525 1526 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1527 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1528 for (i=0; i<mbs; i++) { 1529 c->imax[i] = a->imax[i]; 1530 c->ilen[i] = a->ilen[i]; 1531 } 1532 1533 /* allocate the matrix space */ 1534 c->singlemalloc = PETSC_TRUE; 1535 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1536 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1537 c->j = (int*)(c->a + nz*bs2); 1538 c->i = c->j + nz; 1539 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1540 if (mbs > 0) { 1541 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1542 if (cpvalues == MAT_COPY_VALUES) { 1543 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1544 } else { 1545 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1546 } 1547 } 1548 1549 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1550 c->sorted = a->sorted; 1551 c->roworiented = a->roworiented; 1552 c->nonew = a->nonew; 1553 1554 if (a->diag) { 1555 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1556 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1557 for (i=0; i<mbs; i++) { 1558 c->diag[i] = a->diag[i]; 1559 } 1560 } else c->diag = 0; 1561 c->nz = a->nz; 1562 c->maxnz = a->maxnz; 1563 c->solve_work = 0; 1564 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1565 c->mult_work = 0; 1566 C->preallocated = PETSC_TRUE; 1567 C->assembled = PETSC_TRUE; 1568 *B = C; 1569 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1570 PetscFunctionReturn(0); 1571 } 1572 1573 EXTERN_C_BEGIN 1574 #undef __FUNCT__ 1575 #define __FUNCT__ "MatLoad_SeqBAIJ" 1576 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A) 1577 { 1578 Mat_SeqBAIJ *a; 1579 Mat B; 1580 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1581 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1582 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1583 int *masked,nmask,tmp,bs2,ishift; 1584 PetscScalar *aa; 1585 MPI_Comm comm = ((PetscObject)viewer)->comm; 1586 1587 PetscFunctionBegin; 1588 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1589 bs2 = bs*bs; 1590 1591 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1592 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1593 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1594 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1595 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1596 M = header[1]; N = header[2]; nz = header[3]; 1597 1598 if (header[3] < 0) { 1599 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1600 } 1601 1602 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1603 1604 /* 1605 This code adds extra rows to make sure the number of rows is 1606 divisible by the blocksize 1607 */ 1608 mbs = M/bs; 1609 extra_rows = bs - M + bs*(mbs); 1610 if (extra_rows == bs) extra_rows = 0; 1611 else mbs++; 1612 if (extra_rows) { 1613 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1614 } 1615 1616 /* read in row lengths */ 1617 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1618 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1619 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1620 1621 /* read in column indices */ 1622 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1623 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1624 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1625 1626 /* loop over row lengths determining block row lengths */ 1627 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1628 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1629 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1630 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1631 masked = mask + mbs; 1632 rowcount = 0; nzcount = 0; 1633 for (i=0; i<mbs; i++) { 1634 nmask = 0; 1635 for (j=0; j<bs; j++) { 1636 kmax = rowlengths[rowcount]; 1637 for (k=0; k<kmax; k++) { 1638 tmp = jj[nzcount++]/bs; 1639 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1640 } 1641 rowcount++; 1642 } 1643 browlengths[i] += nmask; 1644 /* zero out the mask elements we set */ 1645 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1646 } 1647 1648 /* create our matrix */ 1649 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1650 B = *A; 1651 a = (Mat_SeqBAIJ*)B->data; 1652 1653 /* set matrix "i" values */ 1654 a->i[0] = 0; 1655 for (i=1; i<= mbs; i++) { 1656 a->i[i] = a->i[i-1] + browlengths[i-1]; 1657 a->ilen[i-1] = browlengths[i-1]; 1658 } 1659 a->nz = 0; 1660 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1661 1662 /* read in nonzero values */ 1663 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1664 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1665 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1666 1667 /* set "a" and "j" values into matrix */ 1668 nzcount = 0; jcount = 0; 1669 for (i=0; i<mbs; i++) { 1670 nzcountb = nzcount; 1671 nmask = 0; 1672 for (j=0; j<bs; j++) { 1673 kmax = rowlengths[i*bs+j]; 1674 for (k=0; k<kmax; k++) { 1675 tmp = jj[nzcount++]/bs; 1676 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1677 } 1678 } 1679 /* sort the masked values */ 1680 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1681 1682 /* set "j" values into matrix */ 1683 maskcount = 1; 1684 for (j=0; j<nmask; j++) { 1685 a->j[jcount++] = masked[j]; 1686 mask[masked[j]] = maskcount++; 1687 } 1688 /* set "a" values into matrix */ 1689 ishift = bs2*a->i[i]; 1690 for (j=0; j<bs; j++) { 1691 kmax = rowlengths[i*bs+j]; 1692 for (k=0; k<kmax; k++) { 1693 tmp = jj[nzcountb]/bs ; 1694 block = mask[tmp] - 1; 1695 point = jj[nzcountb] - bs*tmp; 1696 idx = ishift + bs2*block + j + bs*point; 1697 a->a[idx] = (MatScalar)aa[nzcountb++]; 1698 } 1699 } 1700 /* zero out the mask elements we set */ 1701 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1702 } 1703 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1704 1705 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1706 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1707 ierr = PetscFree(aa);CHKERRQ(ierr); 1708 ierr = PetscFree(jj);CHKERRQ(ierr); 1709 ierr = PetscFree(mask);CHKERRQ(ierr); 1710 1711 B->assembled = PETSC_TRUE; 1712 1713 ierr = MatView_Private(B);CHKERRQ(ierr); 1714 PetscFunctionReturn(0); 1715 } 1716 EXTERN_C_END 1717 1718 #undef __FUNCT__ 1719 #define __FUNCT__ "MatCreateSeqBAIJ" 1720 /*@C 1721 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1722 compressed row) format. For good matrix assembly performance the 1723 user should preallocate the matrix storage by setting the parameter nz 1724 (or the array nnz). By setting these parameters accurately, performance 1725 during matrix assembly can be increased by more than a factor of 50. 1726 1727 Collective on MPI_Comm 1728 1729 Input Parameters: 1730 + comm - MPI communicator, set to PETSC_COMM_SELF 1731 . bs - size of block 1732 . m - number of rows 1733 . n - number of columns 1734 . nz - number of nonzero blocks per block row (same for all rows) 1735 - nnz - array containing the number of nonzero blocks in the various block rows 1736 (possibly different for each block row) or PETSC_NULL 1737 1738 Output Parameter: 1739 . A - the matrix 1740 1741 Options Database Keys: 1742 . -mat_no_unroll - uses code that does not unroll the loops in the 1743 block calculations (much slower) 1744 . -mat_block_size - size of the blocks to use 1745 1746 Level: intermediate 1747 1748 Notes: 1749 A nonzero block is any block that as 1 or more nonzeros in it 1750 1751 The block AIJ format is fully compatible with standard Fortran 77 1752 storage. That is, the stored row and column indices can begin at 1753 either one (as in Fortran) or zero. See the users' manual for details. 1754 1755 Specify the preallocated storage with either nz or nnz (not both). 1756 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1757 allocation. For additional details, see the users manual chapter on 1758 matrices. 1759 1760 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1761 @*/ 1762 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1763 { 1764 int ierr; 1765 1766 PetscFunctionBegin; 1767 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1768 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 1769 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1770 PetscFunctionReturn(0); 1771 } 1772 1773 #undef __FUNCT__ 1774 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 1775 /*@C 1776 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 1777 per row in the matrix. For good matrix assembly performance the 1778 user should preallocate the matrix storage by setting the parameter nz 1779 (or the array nnz). By setting these parameters accurately, performance 1780 during matrix assembly can be increased by more than a factor of 50. 1781 1782 Collective on MPI_Comm 1783 1784 Input Parameters: 1785 + A - the matrix 1786 . bs - size of block 1787 . nz - number of block nonzeros per block row (same for all rows) 1788 - nnz - array containing the number of block nonzeros in the various block rows 1789 (possibly different for each block row) or PETSC_NULL 1790 1791 Options Database Keys: 1792 . -mat_no_unroll - uses code that does not unroll the loops in the 1793 block calculations (much slower) 1794 . -mat_block_size - size of the blocks to use 1795 1796 Level: intermediate 1797 1798 Notes: 1799 The block AIJ format is fully compatible with standard Fortran 77 1800 storage. That is, the stored row and column indices can begin at 1801 either one (as in Fortran) or zero. See the users' manual for details. 1802 1803 Specify the preallocated storage with either nz or nnz (not both). 1804 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1805 allocation. For additional details, see the users manual chapter on 1806 matrices. 1807 1808 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1809 @*/ 1810 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1811 { 1812 Mat_SeqBAIJ *b; 1813 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1814 PetscTruth flg; 1815 1816 PetscFunctionBegin; 1817 ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr); 1818 if (!flg) PetscFunctionReturn(0); 1819 1820 B->preallocated = PETSC_TRUE; 1821 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1822 if (nnz && newbs != bs) { 1823 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1824 } 1825 bs = newbs; 1826 1827 mbs = B->m/bs; 1828 nbs = B->n/bs; 1829 bs2 = bs*bs; 1830 1831 if (mbs*bs!=B->m || nbs*bs!=B->n) { 1832 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1833 } 1834 1835 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1836 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1837 if (nnz) { 1838 for (i=0; i<mbs; i++) { 1839 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1840 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 1841 } 1842 } 1843 1844 b = (Mat_SeqBAIJ*)B->data; 1845 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1846 B->ops->solve = MatSolve_SeqBAIJ_Update; 1847 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 1848 if (!flg) { 1849 switch (bs) { 1850 case 1: 1851 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1852 B->ops->mult = MatMult_SeqBAIJ_1; 1853 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1854 break; 1855 case 2: 1856 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1857 B->ops->mult = MatMult_SeqBAIJ_2; 1858 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1859 break; 1860 case 3: 1861 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1862 B->ops->mult = MatMult_SeqBAIJ_3; 1863 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1864 break; 1865 case 4: 1866 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1867 B->ops->mult = MatMult_SeqBAIJ_4; 1868 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1869 break; 1870 case 5: 1871 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1872 B->ops->mult = MatMult_SeqBAIJ_5; 1873 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 1874 break; 1875 case 6: 1876 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 1877 B->ops->mult = MatMult_SeqBAIJ_6; 1878 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 1879 break; 1880 case 7: 1881 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 1882 B->ops->mult = MatMult_SeqBAIJ_7; 1883 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 1884 break; 1885 default: 1886 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 1887 B->ops->mult = MatMult_SeqBAIJ_N; 1888 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 1889 break; 1890 } 1891 } 1892 b->bs = bs; 1893 b->mbs = mbs; 1894 b->nbs = nbs; 1895 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1896 if (!nnz) { 1897 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1898 else if (nz <= 0) nz = 1; 1899 for (i=0; i<mbs; i++) b->imax[i] = nz; 1900 nz = nz*mbs; 1901 } else { 1902 nz = 0; 1903 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1904 } 1905 1906 /* allocate the matrix space */ 1907 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1908 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1909 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1910 b->j = (int*)(b->a + nz*bs2); 1911 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1912 b->i = b->j + nz; 1913 b->singlemalloc = PETSC_TRUE; 1914 1915 b->i[0] = 0; 1916 for (i=1; i<mbs+1; i++) { 1917 b->i[i] = b->i[i-1] + b->imax[i-1]; 1918 } 1919 1920 /* b->ilen will count nonzeros in each block row so far. */ 1921 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1922 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1923 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1924 1925 b->bs = bs; 1926 b->bs2 = bs2; 1927 b->mbs = mbs; 1928 b->nz = 0; 1929 b->maxnz = nz*bs2; 1930 B->info.nz_unneeded = (PetscReal)b->maxnz; 1931 PetscFunctionReturn(0); 1932 } 1933 1934