awk ' (NR%3==1) {one=$0; if($3>0) child = 1;} (NR%3==2) {two=$0; if($3>0) child = 2;} (NR%3==0) {three=$0; if($3>0) child = 3; if(child==1) {print two; print three; print one} if(child==2) {print one; print three; print two} if(child==3) {print one; print two; print three} } ' $1 | awk ' (NR%3==1) { pedname=$1; for(i=1;i<=103;i++) { a = 7 + (i-1) * 2; b = a+1; ma[i,1] = $a ma[i,2] = $b } } (NR%3==2) { for(i=1;i<=103;i++) { a = 7 + (i-1) * 2; b = a+1; pa[i,1] = $a pa[i,2] = $b } } (NR%3==0) { kidnum=$2; for(i=1;i<=103;i++) { a = 7 + (i-1) * 2; b = a+1; kid[i,1] = $a kid[i,2] = $b } # now we do the phasing for ma: for(i=0;i<=103;i++) { # if it is missing if(ma[i,1]==0 || ma[i,2]==0) { mat[i]=0; mau[i]=0; } # if it is homozygous else if(ma[i,1]==ma[i,2]) { mat[i]=ma[i,1]; mau[i]=ma[i,1]; } # if it is heterozygous else if(ma[i,1]!=ma[i,2]) { # if kid is missing if(kid[i,1]==0 || kid[i,2]==0) { mat[i] = 0; mau[i] = 0; } # if kid is homozygous else if(kid[i,1]==kid[i,2]) { # here is a weird fix to get it to be like the daly data if(pa[i,1]==0 || pa[i,2]==0) { mat[i]=0; mau[i]=0; } else { mat[i]=kid[i,1]; if(ma[i,1]==kid[i,1]) mau[i] = ma[i,2]; else mau[i] = ma[i,1]; } } # if kid is heterozygous else if(kid[i,1]!=kid[i,2]) { # if the other parent is missing if(pa[i,1]==0 || pa[i,2]==0) { mat[i]=0; mau[i]=0; } # if other parent is homozygous else if(pa[i,1]==pa[i,2]) { if(kid[i,1]==pa[i,1]) { mat[i] = kid[i,2]; mau[i] = kid[i,1]; } else { mat[i] = kid[i,1]; mau[i] = kid[i,2]; } } # if other parent is heterozygous else if(pa[i,1]!=pa[i,2]) { mat[i] = "h"; mau[i] = "h"; } } } } # and now we print out ma: printf("%s\t",pedname); for(i=1;i<=103;i++) { printf("%s ",mat[i]); } printf("\n"); printf("%s\t",pedname); for(i=1;i<=103;i++) { printf("%s ",mau[i]); } printf("\n"); # now we do the phasing for pa: for(i=0;i<=103;i++) { # if it is missing if(pa[i,1]==0 || pa[i,2]==0) { pat[i]=0; pau[i]=0; } # if it is homozygous else if(pa[i,1]==pa[i,2]) { pat[i]=pa[i,1]; pau[i]=pa[i,1]; } # if it is heterozygous else if(pa[i,1]!=pa[i,2]) { # if kid is missing if(kid[i,1]==0 || kid[i,2]==0) { pat[i] = 0; pau[i] = 0; } # if kid is homozygous else if(kid[i,1]==kid[i,2]) { # the weird thing to make this like the data Daly sent us: if(ma[i,1]==0 || ma[i,2]==0) { pat[i]=0; pau[i]=0; } else { pat[i]=kid[i,1]; if(pa[i,1]==kid[i,1]) pau[i] = pa[i,2]; else pau[i] = pa[i,1]; } } # if kid is heterozygous else if(kid[i,1]!=kid[i,2]) { # if the other parent is missing if(ma[i,1]==0 || ma[i,2]==0) { pat[i]=0; pau[i]=0; } # if other parent is homozygous else if(ma[i,1]==ma[i,2]) { if(kid[i,1]==ma[i,1]) { pat[i] = kid[i,2]; pau[i] = kid[i,1]; } else { pat[i] = kid[i,1]; pau[i] = kid[i,2]; } } # if other parent is heterozygous else if(ma[i,1]!=ma[i,2]) { pat[i] = "h"; pau[i] = "h"; } } } } # and now we print out pa: printf("%s\t",pedname); for(i=1;i<=103;i++) { printf("%s ",pat[i]); } printf("\n"); printf("%s\t",pedname); for(i=1;i<=103;i++) { printf("%s ",pau[i]); } printf("\n"); } ' | awk ' (NR%2==1) {trans[++t]=$0;} (NR%2==0) {untrans[++u]=$0;} END { for(i=1;i<=258;i++) print trans[i]; for(i=1;i<=258;i++) print untrans[i]; } '