libStatGen Software 1
Pedigree.cpp
1/*
2 * Copyright (C) 2010 Regents of the University of Michigan
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include "Pedigree.h"
19#include "GenotypeLists.h"
20#include "MemoryInfo.h"
21#include "Constant.h"
22#include "Error.h"
23#include "Sort.h"
24
25#include <stdlib.h>
26
27bool Pedigree::sexAsCovariate = false;
28String Pedigree::missing("-99.999");
29
30Pedigree::Pedigree() : pd()
31{
32 haveTwins = count = 0;
33 size = 10000;
34 persons = new Person *[size];
35 familyCount = 0;
36 families = new Family * [1];
37 multiPd = NULL;
38 multiFileCount = 0;
39}
40
41Pedigree::~Pedigree()
42{
43 for (int i = 0; i < count; i++)
44 delete persons[i];
45
46 for (int i = 0; i < familyCount; i++)
47 delete families[i];
48
49 delete [] families;
50 delete [] persons;
51
52 if (multiPd != NULL)
53 delete [] multiPd;
54}
55
56void Pedigree::Sort()
57{
58 QuickSort(persons, count, sizeof(Person *),
59 COMPAREFUNC Pedigree::ComparePersons);
60
61 haveTwins = 0;
62
63 // Check for structural problems in input pedigree
64 bool problem = false;
65
66 // Check that we have no duplicates...
67 for (int i = 1; i < count; i++)
68 if (ComparePersons((const Person **) &persons[i-1],
69 (const Person **) &persons[i]) == 0)
70 {
71 printf("Family %s: Person %s is duplicated\n",
72 (const char *) persons[i]->famid,
73 (const char *) persons[i]->pid);
74 problem = true;
75
76 do
77 {
78 i++;
79 }
80 while (i < count &&
81 ComparePersons((const Person **) &persons[i-1],
82 (const Person **) &persons[i]) == 0);
83 }
84
85 // Assign parents...
86 for (int i = 0; i < count; i++)
87 {
88 persons[i]->serial = i;
89 persons[i]->father = FindPerson(persons[i]->famid, persons[i]->fatid);
90 persons[i]->mother = FindPerson(persons[i]->famid, persons[i]->motid);
91
92 problem |= !persons[i]->CheckParents();
93
94 persons[i]->AssessStatus();
95
96 // Check if we have any twins...
97 haveTwins |= persons[i]->zygosity;
98 }
99
100 if (problem)
101 error("Please correct problems with pedigree structure\n");
102
103 MakeSibships();
104 MakeFamilies();
105}
106
107void Pedigree::MakeSibships()
108{
109 Person ** sibs = new Person * [count];
110 for (int i = 0; i < count; i++)
111 sibs[i] = persons[i];
112
113 QuickSort(sibs, count, sizeof(Person *),
114 COMPAREFUNC Pedigree::CompareParents);
115
116 for (int first = 0; first < count; first++)
117 if (!sibs[first]->isFounder())
118 {
119 int last = first + 1;
120 while (last < count)
121 if (sibs[first]-> mother != sibs[last]->mother ||
122 sibs[first]-> father != sibs[last]->father)
123 break;
124 else last++;
125 last --;
126
127 for (int j = first; j <= last; j++)
128 {
129 if (sibs[j]->sibCount) delete [] sibs[j]->sibs;
130 sibs[j]->sibCount = last - first + 1;
131 sibs[j]->sibs = new Person * [sibs[j]->sibCount];
132 for (int k = first; k <= last; k++)
133 sibs[j]->sibs[k - first] = sibs[k];
134 }
135 first = last;
136 }
137 delete [] sibs;
138}
139
140void Pedigree::MakeFamilies()
141{
142 for (int i = 0; i < familyCount; i++)
143 delete families[i];
144 delete [] families;
145
146 familyCount = 0;
147 families = new Family * [count];
148
149 for (int first=0; first < count; first++)
150 {
151 int last = first;
152 while (last < count)
153 if (SlowCompare(persons[first]->famid, persons[last]->famid) == 0)
154 last++;
155 else break;
156
157 families[familyCount] = new Family(*this, first, --last, familyCount);
158
159 first = last;
160 familyCount++;
161 }
162}
163
164// Utility functions for finding a person in a pedigree
165
167{
168 const char * famid;
169 const char * pid;
170};
171
172int CompareKeyToPerson(PedigreeKey * key, Person ** p)
173{
174 int result = SlowCompare(key->famid, (**p).famid);
175
176 if (result != 0)
177 return result;
178
179 return SlowCompare(key->pid, (**p).pid);
180}
181
182int CompareKeyToFamily(PedigreeKey * key, Family ** f)
183{
184 return SlowCompare(key->famid, (**f).famid);
185}
186
187Person * Pedigree::FindPerson(const char * famid, const char * pid)
188{
189 PedigreeKey key;
190 key.famid = famid;
191 key.pid = pid;
192
193 Person ** result = (Person **) BinarySearch
194 (&key, persons, count, sizeof(Person *),
195 COMPAREFUNC CompareKeyToPerson);
196
197 return (result == NULL) ? (Person *) NULL : *result;
198}
199
200Person * Pedigree::FindPerson(const char *famid, const char *pid, int universe)
201{
202 PedigreeKey key;
203 key.famid = famid;
204 key.pid = pid;
205
206 Person ** result = (Person **) BinarySearch
207 (&key, persons, universe, sizeof(Person *),
208 COMPAREFUNC CompareKeyToPerson);
209
210 return (result == NULL) ? (Person *) NULL : *result;
211}
212
213Family * Pedigree::FindFamily(const char * famid)
214{
215 PedigreeKey key;
216 key.famid = famid;
217
218 Family ** result = (Family **) BinarySearch
219 (&key, families, familyCount, sizeof(Family *),
220 COMPAREFUNC CompareKeyToFamily);
221
222 return (result == NULL) ? (Family *) NULL : *result;
223}
224
225int Pedigree::CountAlleles(int marker)
226{
227 return ::CountAlleles(*this, marker);
228}
229
230void Pedigree::LumpAlleles(double min, bool reorder)
231{
232 if (min > 0.0)
233 printf("Lumping alleles with frequencies of %.2f or less...\n\n", min);
234
235 for (int m=0; m < markerCount; m++)
236 ::LumpAlleles(*this, m, min, reorder);
237}
238
239void Pedigree::EstimateFrequencies(int estimator, bool quiet)
240{
241 bool estimated = false;
242 int line = 3;
243
244 const char * estimators[] =
245 { "using all genotypes", "using founder genotypes", "assumed equal" };
246
247 bool condensed = markerCount > 100;
248 int grain = markerCount / 50, estimates = 0;
249
250 for (int m=0; m < markerCount; m++)
251 if (::EstimateFrequencies(*this, m, estimator))
252 if (!quiet)
253 {
254 if (!estimated)
255 printf("Estimating allele frequencies... [%s]\n ",
256 estimators[estimator]), estimated = true;
257
258 if (condensed)
259 {
260 if (estimates++ % grain == 0)
261 {
262 printf(".");
263 fflush(stdout);
264 }
265 continue;
266 }
267
268 if (line + markerNames[m].Length() + 1 > 79)
269 printf("\n "), line = 3;
270
271 printf("%s ", (const char *) markerNames[m]);
272 line += markerNames[m].Length() + 1;
273 }
274
275 if (estimated)
276 printf(condensed ? "\nDone estimating frequencies for %d markers\n\n" : "\n\n", estimates);
277}
278
279int Pedigree::ComparePersons(const Person ** p1, const Person ** p2)
280{
281 int result = SlowCompare((**p1).famid, (**p2).famid);
282
283 if (result != 0) return result;
284
285 return SlowCompare((**p1).pid, (**p2).pid);
286}
287
288int Pedigree::CompareParents(const Person ** p1, const Person ** p2)
289{
290 int result = SlowCompare((**p1).famid, (**p2).famid);
291
292 if (result) return result;
293
294 result = SlowCompare((**p1).fatid, (**p2).fatid);
295
296 if (result) return result;
297
298 return SlowCompare((**p1).motid, (**p2).motid);
299}
300
301void Pedigree::Grow()
302{
303 size *= 2;
304
305 Person ** temp = new Person * [size];
306 if (temp == NULL) error("Out of memory");
307
308 for (int i=0; i<count; i++)
309 temp[i] = persons[i];
310
311 delete [] persons;
312 persons = temp;
313}
314
315void Pedigree::Add(Person & rhs)
316{
317 if (count == size)
318 Grow();
319
320 persons[count] = new Person();
321 persons[count++]->Copy(rhs);
322}
323
324void Pedigree::WriteDataFile(FILE * output)
325{
326 // write in the following order:
327 // markers, traits, affections, covariates
328
329 if (haveTwins)
330 fprintf(output, " Z Zygosity\n");
331
332 for (int m = 0; m < markerCount; m++)
333 fprintf(output, " M %s\n", (const char *) markerNames[m]);
334
335 for (int t = 0; t < traitCount; t++)
336 fprintf(output, " T %s\n", (const char *) traitNames[t]);
337
338 for (int a = 0; a < affectionCount; a++)
339 fprintf(output, " A %s\n", (const char *) affectionNames[a]);
340
341 for (int c = 0; c < covariateCount; c++)
342 fprintf(output, " C %s\n", (const char *) covariateNames[c]);
343
344 for (int s = 0; s < stringCount; s++)
345 fprintf(output, " $ %s\n", (const char *) stringNames[s]);
346
347 fprintf(output, " E END-OF-DATA \n");
348}
349
350void Pedigree::WritePedigreeFile(FILE * output)
351{
352 MarkerInfo ** info = new MarkerInfo * [markerCount];
353
354 for (int i = 0; i < markerCount; i++)
355 info[i] = GetMarkerInfo(i);
356
357 for (int i = 0; i < count; i++)
358 WriteRecodedPerson(output, i, info);
359 fprintf(output, "end\n");
360
361 delete [] info;
362}
363
364void Pedigree::WritePerson(FILE * output, int person, const char * famid,
365 const char * pid, const char * fatid, const char * motid)
366{
367 WriteRecodedPerson(output, person, NULL, famid, pid, fatid, motid);
368}
369
370void Pedigree::WriteRecodedPerson(
371 FILE * output, int person, MarkerInfo ** markerInfo,
372 const char * famid, const char * pid, const char * fatid,
373 const char * motid)
374{
375 Person * p = persons[person];
376
377 if (famid == NULL) famid = p->famid;
378 if (pid == NULL) pid = p->pid;
379 if (fatid == NULL) fatid = p->fatid;
380 if (motid == NULL) motid = p->motid;
381
382 // write in the following order:
383 // markers, traits, affections, covariates
384
385 fprintf(output, "%s\t%s\t%s\t%s\t%d\t",
386 famid, pid, fatid, motid, p->sex);
387
388 const char * twinCodes[] = {"0", "MZ", "DZ"};
389
390 if (haveTwins)
391 {
392 if (p->zygosity <= 2)
393 fprintf(output, "%s\t", twinCodes[p->zygosity]);
394 else
395 fprintf(output, "%d\t", p->zygosity);
396 }
397
398 for (int m = 0; m < markerCount; m++)
399 if (markerInfo == NULL)
400 fprintf(output, markerCount < 20 ? "%3d/%3d\t" : "%d/%d\t",
401 p->markers[m][0], p->markers[m][1]);
402 else
403 fprintf(output, markerCount < 20 ? "%3s/%3s\t" : "%s/%s\t",
404 (const char *) markerInfo[m]->GetAlleleLabel(p->markers[m][0]),
405 (const char *) markerInfo[m]->GetAlleleLabel(p->markers[m][1]));
406
407 for (int t = 0; t < traitCount; t++)
408 if (p->isPhenotyped(t))
409 fprintf(output, "%.3f\t", p->traits[t]);
410 else
411 fprintf(output, "x\t");
412
413 for (int a = 0; a < affectionCount; a++)
414 if (p->isDiagnosed(a))
415 fprintf(output, "%d\t", p->affections[a]);
416 else
417 fprintf(output, "x\t");
418
419 for (int c = 0; c < covariateCount; c++)
420 if (p->isControlled(c))
421 fprintf(output, "%.3f\t", p->covariates[c]);
422 else
423 fprintf(output, "x\t");
424
425 for (int s = 0; s < stringCount; s++)
426 if (!p->strings[s].IsEmpty())
427 fprintf(output, "%s\t", (const char *) p->strings[s]);
428 else
429 fprintf(output, ".\t");
430
431 fprintf(output, "\n");
432}
433
434void Pedigree::WriteDataFile(const char * output)
435{
436 FILE * f = fopen(output, "wt");
437 if (f == NULL) error("Couldn't open data file %s", output);
438 WriteDataFile(f);
439 fclose(f);
440}
441
442void Pedigree::WritePedigreeFile(const char * output)
443{
444 FILE * f = fopen(output, "wt");
445 if (f == NULL) error("Couldn't open pedigree file %s", output);
446 WritePedigreeFile(f);
447 fclose(f);
448}
449
450void Pedigree::PrepareDichotomization()
451{
452
453 for (int t = 0; t < traitCount; t++)
454 {
455 String new_affection = traitNames[t] + "*";
456 GetAffectionID(new_affection);
457 }
458}
459
460int Pedigree::Dichotomize(int t, double mean)
461{
462 String new_affection = traitNames[t] + "*";
463
464 int af = GetAffectionID(new_affection);
465
466 if (mean == _NAN_)
467 {
468 mean = 0.0;
469 double dcount = 0;
470 for (int i = 0; i < count; i++)
471 if (persons[i]->isPhenotyped(t) &&
472 !persons[i]->isFounder())
473 {
474 mean += persons[i]->traits[t];
475 dcount ++;
476 }
477
478 if (!dcount) return af;
479
480 mean /= dcount;
481 }
482
483 printf("Dichotomizing %s around mean of %.3f ...\n",
484 (const char *) traitNames[t], mean);
485
486 for (int i = 0; i < count; i++)
487 if (persons[i]->isPhenotyped(t) && !persons[i]->isFounder())
488 persons[i]->affections[af] = persons[i]->traits[t] > mean ? 2 : 1;
489 else
490 persons[i]->affections[af] = 0;
491
492 Sort();
493
494 return af;
495}
496
497void Pedigree::DichotomizeAll(double mean)
498{
499 for (int t = 0; t < traitCount; t++)
500 Dichotomize(t, mean);
501}
502
503bool Pedigree::InheritanceCheck(bool abortIfInconsistent)
504{
505 bool fail = false;
506
507 if (haveTwins) fail |= TwinCheck();
508
509 if (chromosomeX)
510 fail |= SexLinkedCheck();
511 else
512 fail |= AutosomalCheck();
513
514 if (fail && abortIfInconsistent)
515 error("Mendelian inheritance errors detected\n");
516
517 return !fail;
518}
519
520bool Pedigree::AutosomalCheck()
521{
522 // Arrays indicating which alleles and homozygotes occur
523 IntArray haplos, genos, counts, failedFamilies;
524
525 bool fail = false;
526
527 // For each marker ...
528 for (int m = 0; m < markerCount; m++)
529 {
530 MarkerInfo * info = GetMarkerInfo(m);
531
532 // Summary for marker
533 int alleleCount = CountAlleles(m);
534 int genoCount = alleleCount * (alleleCount + 1) / 2;
535
536 // Initialize arrays
537 haplos.Dimension(alleleCount + 1);
538 haplos.Set(-1);
539
540 genos.Dimension(genoCount + 1);
541 genos.Set(-1);
542
543 failedFamilies.Dimension(familyCount);
544 failedFamilies.Zero();
545
546 counts.Dimension(alleleCount + 1);
547
548 for (int f = 0; f < familyCount; f++)
549 for (int i = families[f]->first; i <= families[f]->last; i++)
550 if (!persons[i]->isFounder() && persons[i]->sibs[0] == persons[i])
551 {
552 // This loop runs once per sibship
553 Alleles fat = persons[i]->father->markers[m];
554 Alleles mot = persons[i]->mother->markers[m];
555 bool fgeno = fat.isKnown();
556 bool mgeno = mot.isKnown();
557
558 // Number of alleles, homozygotes and genotypes in this sibship
559 int haplo = 0, homo = 0, diplo = 0;
560
561 // No. of different genotypes per allele
562 counts.Zero();
563
564 // In general, there should be no more than 3 genotypes per allele
565 bool too_many_genos = false;
566
567 for (int j = 0; j < persons[i]->sibCount; j++)
568 if (persons[i]->sibs[j]->isGenotyped(m))
569 {
570 Alleles geno = persons[i]->sibs[j]->markers[m];
571
572 int fat1 = fat.hasAllele(geno.one);
573 int fat2 = fat.hasAllele(geno.two);
574 int mot1 = mot.hasAllele(geno.one);
575 int mot2 = mot.hasAllele(geno.two);
576
577 if ((fgeno && mgeno && !((fat1 && mot2) || (fat2 && mot1))) ||
578 (fgeno && !(fat1 || fat2)) || (mgeno && !(mot1 || mot2)))
579 {
580 printf("%s - Fam %s: Child %s [%s/%s] has ",
581 (const char *) markerNames[m],
582 (const char *) persons[i]->sibs[j]->famid,
583 (const char *) persons[i]->sibs[j]->pid,
584 (const char *) info->GetAlleleLabel(geno.one),
585 (const char *) info->GetAlleleLabel(geno.two));
586
587 if (!fgeno || !mgeno)
588 printf("%s [%s/%s]\n",
589 fgeno ? "father" : "mother",
590 (const char *) info->GetAlleleLabel(fgeno ? fat.one : mot.one),
591 (const char *) info->GetAlleleLabel(fgeno ? fat.two : mot.two));
592 else
593 printf("parents [%s/%s]*[%s/%s]\n",
594 (const char *) info->GetAlleleLabel(fat.one),
595 (const char *) info->GetAlleleLabel(fat.two),
596 (const char *) info->GetAlleleLabel(mot.one),
597 (const char *) info->GetAlleleLabel(mot.two));
598
599 fail = true;
600 failedFamilies[f] = true;
601 }
602 else
603 {
604 if (haplos[geno.one] != i)
605 {
606 haplo++;
607 haplos[geno.one] = i;
608 };
609 if (haplos[geno.two] != i)
610 {
611 haplo++;
612 haplos[geno.two] = i;
613 };
614
615 int index = geno.SequenceCoded();
616
617 if (genos[index] != i)
618 {
619 genos[index] = i;
620 diplo++;
621 counts[geno.one]++;
622 if (geno.isHomozygous())
623 homo++;
624 else
625 counts[geno.two]++;
626 if (counts[geno.one] > 2) too_many_genos = true;
627 if (counts[geno.two] > 2) too_many_genos = true;
628 }
629 }
630 }
631
632 if (fgeno)
633 {
634 if (haplos[fat.one] != i)
635 {
636 haplo++;
637 haplos[fat.one] = i;
638 }
639 if (haplos[fat.two] != i)
640 {
641 haplo++;
642 haplos[fat.two] = i;
643 }
644 homo += fat.isHomozygous();
645 }
646
647 if (mgeno)
648 {
649 if (haplos[mot.one] != i)
650 {
651 haplo++;
652 haplos[mot.one] = i;
653 }
654 if (haplos[mot.two] != i)
655 {
656 haplo++;
657 haplos[mot.two] = i;
658 }
659 homo += mot.isHomozygous();
660 }
661
662 if (diplo > 4 || haplo + homo > 4 || (haplo == 4 && too_many_genos))
663 {
664 printf("%s - Fam %s: ",
665 (const char *) markerNames[m],
666 (const char *) persons[i]->famid);
667 if (persons[i]->father->markers[m].isKnown())
668 printf("Father %s [%s/%s] has children [",
669 (const char *) persons[i]->father->pid,
670 (const char *) info->GetAlleleLabel(fat.one),
671 (const char *) info->GetAlleleLabel(fat.two));
672 else if (persons[i]->mother->markers[m].isKnown())
673 printf("Mother %s [%s/%s] has children [",
674 (const char *) persons[i]->mother->pid,
675 (const char *) info->GetAlleleLabel(mot.one),
676 (const char *) info->GetAlleleLabel(mot.two));
677 else
678 printf("Couple %s * %s has children [",
679 (const char *) persons[i]->mother->pid,
680 (const char *) persons[i]->father->pid);
681
682 for (int j = 0; j < persons[i]->sibCount; j++)
683 printf("%s%s/%s", j == 0 ? "" : " ",
684 (const char *) info->GetAlleleLabel(persons[i]->sibs[j]->markers[m].one),
685 (const char *) info->GetAlleleLabel(persons[i]->sibs[j]->markers[m].two));
686 printf("]\n");
687
688 fail = true;
689 failedFamilies[f] = true;
690 }
691 }
692
693 for (int f = 0; f < familyCount; f++)
694 if (!failedFamilies[f] &&
695 (families[f]->count > families[f]->founders + 1) &&
696 !families[f]->isNuclear())
697 fail |= !GenotypeList::EliminateGenotypes(*this, families[f], m);
698 }
699
700 if (fail)
701 printf("\nMendelian inheritance errors detected\n");
702
703 return fail;
704}
705
706bool Pedigree::SexLinkedCheck()
707{
708 bool fail = false;
709
710 // Keep track of what families fail the basic inheritance check,
711 // so that we can run later run genotype elimination check on the remainder
712 IntArray failedFamilies(familyCount);
713
714 // For each marker ...
715 for (int m = 0; m < markerCount; m++)
716 {
717 MarkerInfo * info = GetMarkerInfo(m);
718
719 failedFamilies.Zero();
720
721 // Check for homozygous males
722 for (int f = 0; f < familyCount; f++)
723 for (int i = families[f]->first; i <= families[f]->last; i++)
724 if (persons[i]->sex == SEX_MALE && persons[i]->markers[m].isKnown() &&
725 !persons[i]->markers[m].isHomozygous())
726 {
727 printf("%s - Fam %s: Male %s has two X alleles [%s/%s]\n",
728 (const char *) markerNames[m],
729 (const char *) persons[i]->famid, (const char *) persons[i]->pid,
730 (const char *) info->GetAlleleLabel(persons[i]->markers[m].one),
731 (const char *) info->GetAlleleLabel(persons[i]->markers[m].two));
732
733 // Wipe this genotype so we don't get cascading errors below
734 persons[i]->markers[m][0] = persons[i]->markers[m][1] = 0;
735
736 fail = true;
737 failedFamilies[f] = true;
738 }
739
740 // Check full sibships for errors
741 // TODO -- We could do better by grouping male half-sibs
742 for (int f = 0; f < familyCount; f++)
743 for (int i = families[f]->first; i <= families[f]->last; i++)
744 if (!persons[i]->isFounder() && persons[i]->sibs[0] == persons[i])
745 {
746 // This loop runs once per sibship
747 Alleles fat = persons[i]->father->markers[m];
748 Alleles mot = persons[i]->mother->markers[m];
749
750 bool fgeno = fat.isKnown();
751 bool mgeno = mot.isKnown();
752
753 Alleles inferred_mother = mot;
754 Alleles first_sister;
755 Alleles inferred_father;
756
757 bool mother_ok = true;
758
759 int sisters = 0;
760
761 for (int j = 0; j < persons[i]->sibCount; j++)
762 if (persons[i]->sibs[j]->isGenotyped(m))
763 {
764 Alleles geno = persons[i]->sibs[j]->markers[m];
765
766 bool fat1 = fat.hasAllele(geno.one);
767 bool fat2 = fat.hasAllele(geno.two);
768 bool mot1 = mot.hasAllele(geno.one);
769 bool mot2 = mot.hasAllele(geno.two);
770
771 int sex = persons[i]->sibs[j]->sex;
772
773 if (sex == SEX_MALE)
774 {
775 if (mgeno && !mot1)
776 {
777 printf("%s - Fam %s: Child %s [%s/Y] has mother [%s/%s]\n",
778 (const char *) markerNames[m],
779 (const char *) persons[i]->famid,
780 (const char *) persons[i]->sibs[j]->pid,
781 (const char *) info->GetAlleleLabel(geno.one),
782 (const char *) info->GetAlleleLabel(mot.one),
783 (const char *) info->GetAlleleLabel(mot.two));
784 fail = true;
785 failedFamilies[f] = true;
786 }
787 else
788 mother_ok &= inferred_mother.AddAllele(geno.one);
789 }
790 if (sex == SEX_FEMALE)
791 {
792 if ((fgeno && mgeno && !((fat1 && mot2) || (fat2 && mot1))) ||
793 (fgeno && !(fat1 || fat2)) || (mgeno && !(mot1 || mot2)))
794 {
795 printf("%s - Fam %s: Child %s [%s/%s] has ",
796 (const char *) markerNames[m],
797 (const char *) persons[i]->famid,
798 (const char *) persons[i]->sibs[j]->pid,
799 (const char *) info->GetAlleleLabel(geno.one),
800 (const char *) info->GetAlleleLabel(geno.two));
801
802 if (!fgeno)
803 printf("mother [%s/%s]\n",
804 (const char *) info->GetAlleleLabel(mot.one),
805 (const char *) info->GetAlleleLabel(mot.two));
806 else if (!mgeno)
807 printf("father [%s/Y]\n",
808 (const char *) info->GetAlleleLabel(fat.one));
809 else
810 printf("parents [%s/Y]*[%s/%s]\n",
811 (const char *) info->GetAlleleLabel(fat.one),
812 (const char *) info->GetAlleleLabel(mot.one),
813 (const char *) info->GetAlleleLabel(mot.two));
814
815 fail = true;
816 failedFamilies[f] = true;
817 }
818 else
819 {
820 if (!sisters++)
821 inferred_father = first_sister = geno;
822 else if (first_sister != geno)
823 {
824 inferred_father.Intersect(geno);
825
826 mother_ok &= inferred_mother.AddAllele(
827 geno.otherAllele(inferred_father.one));
828 mother_ok &= inferred_mother.AddAllele(
829 first_sister.otherAllele(inferred_father.one));
830 }
831
832 if (!fgeno && (mot1 ^ mot2))
833 inferred_father.Intersect(mot1 ? geno.two : geno.one);
834
835 if (!mgeno && (fat1 ^ fat2))
836 mother_ok &= inferred_mother.AddAllele(fat1 ? geno.two : geno.one);
837 }
838 }
839 }
840
841 if (!mother_ok || (sisters && !inferred_father.isKnown()))
842 {
843 printf("%s - Fam %s: ",
844 (const char *) markerNames[m],
845 (const char *) persons[i]->famid);
846 if (fgeno)
847 printf("Father %s [%s/Y] has children [",
848 (const char *) persons[i]->father->pid,
849 (const char *) info->GetAlleleLabel(fat.one));
850 else if (mgeno)
851 printf("Mother %s [%s/%s] has children [",
852 (const char *) persons[i]->mother->pid,
853 (const char *) info->GetAlleleLabel(mot.one),
854 (const char *) info->GetAlleleLabel(mot.two));
855 else
856 printf("Couple %s * %s has children [",
857 (const char *) persons[i]->mother->pid,
858 (const char *) persons[i]->father->pid);
859
860 for (int j = 0; j < persons[i]->sibCount; j++)
861 printf(
862 persons[i]->sibs[j]->sex == SEX_MALE ? "%s%s/Y" : "%s%s/%s",
863 j == 0 ? "" : " ",
864 (const char *) info->GetAlleleLabel(persons[i]->sibs[j]->markers[m].one),
865 (const char *) info->GetAlleleLabel(persons[i]->sibs[j]->markers[m].two));
866 printf("]\n");
867 fail = true;
868 failedFamilies[f] = true;
869 }
870 }
871
872 for (int f = 0; f < familyCount; f++)
873 if (!failedFamilies[f] &&
874 (families[f]->count > families[f]->founders + 1) &&
875 !families[f]->isNuclear())
876 fail |= !GenotypeList::EliminateGenotypes(*this, families[f], m);
877 }
878
879 if (fail)
880 printf("\nMendelian inheritance errors detected\n");
881
882 return fail;
883}
884
885void Pedigree::ExtractFamily(int id, Pedigree & single_fam_ped)
886{
887 for (int i = families[id]->first; i <= families[id]->last; i++)
888 single_fam_ped.Add(*persons[i]);
889
890 single_fam_ped.Sort();
891}
892
893void Pedigree::ExtractOnAffection(int a, Pedigree & new_ped, int target_status)
894{
895 for (int i = 0; i < count; i++)
896 if (persons[i]->affections[a] == target_status)
897 new_ped.Add(*persons[i]);
898 else
899 {
900 Person blank_person;
901 blank_person.CopyIDs(*persons[i]);
902 new_ped.Add(blank_person);
903 }
904
905 new_ped.Sort();
906}
907
908void Pedigree::Filter(IntArray & filter)
909{
910 if (filter.Length() != count)
911 error("Pedigree:Size of pedigree filter doesn't match number of persons in pedigree");
912
913 for (int i = 0; i < count; i++)
914 if (filter[i] == 1)
915 {
916 persons[i]->WipePhenotypes();
917 persons[i]->filter = true;
918 }
919}
920
921void Pedigree::AddPerson(const char * famid, const char * pid,
922 const char * fatid, const char * motid,
923 int sex, bool delay_sort)
924{
925 if (count == size) Grow();
926
927 persons[count] = new Person;
928
929 persons[count]->famid = famid;
930 persons[count]->pid = pid;
931 persons[count]->fatid = fatid;
932 persons[count]->motid = motid;
933 persons[count]->sex = sex;
934
935 count++;
936
937 if (!delay_sort) Sort();
938}
939
940void Pedigree::ShowMemoryInfo()
941{
942 unsigned int bytes = 0;
943
944 for (int i = 0; i < count; i++)
945 bytes += persons[i]->famid.BufferSize() + persons[i]->pid.BufferSize() +
946 persons[i]->fatid.BufferSize() + persons[i]->motid.BufferSize();
947
948 bytes += count * (markerCount * sizeof(Alleles) + traitCount * sizeof(double) +
949 covariateCount * sizeof(double) + affectionCount * sizeof(char) +
950 sizeof(Person));
951
952 printf(" %40s %s\n", "Pedigree file ...", (const char *) MemoryInfo(bytes));
953}
954
955