GenomicRangeQuery – Codility – Solution

Java solution to Codility GenomicRangeQuery problem (Lesson 5 – Prefix Sums) which scored 100%. The problem is to find the minimal nucleotide from a range of sequence DNA. The strategy is to:

  • count occurence of each nucleotide in each row
  • calculate running impact factor sum at each row
  • calculate minimum impact factor by doing simple subtraction at the indices

References
Codility Genomic Range Query – Rafal’s Blog (Java)
Solution to Genomic-Range – Code Says (Python)


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
package com.codility.lesson05.prefixsums;

public class GenomicRangeQuery {
  //main idea: keep running occurrence count of each nucleotide (A/C/G/T) at every position in S
  public int[] solution(String S, int[] P, int[] Q) {
    int [] answers = new int[P.length];
    int stringLength = S.length();
   
    int [][] occurrences = new int [stringLength][4];
   
    //step 1 - for each row, count occurrences of each nucleotide (can only have 1 occurrence / row)
    //e.g. if S=CAGCCTA array will be
    //{
    //  {0,1,0,0}  C
    //  {1,0,0,0}  A
    //  {0,0,1,0}  G
    //  {0,1,0,0}  C
    //  {0,1,0,0}  C
    //  {0,0,0,1}  T
      //  {1,0,0,0}  A
    // }
    for(int i=0; i<occurrences.length; i++) {
      char c = S.charAt(i);
      if(c == 'A')      occurrences[i][0] = 1;
      else if(c == 'C') occurrences[i][1] = 1;
      else if(c == 'G') occurrences[i][2] = 1;
      else if(c == 'T') occurrences[i][3] = 1;
    }

    //step 2 - for each row (starting from 2nd row), add up occurrences of each nucleotide by adding
    //occurrences from previous row to current row
    //now have running sum of each nucleotide for any row
    //e.g. if S=CAGCCTA array will be
    //{
    //  {0,1,0,0}  C
    //  {1,1,0,0}  A
    //  {1,1,1,0}  G
    //  {1,2,1,0}  C
    //  {1,3,1,0}  C
    //  {1,3,1,1}  T
      //  {2,3,1,1}  A
    // }
    for(int i=1; i<stringLength; i++) {
      for(int j=0; j<4; j++) {
        occurrences[i][j] += occurrences[i-1][j];
      }
    }

    //check each slice for min by simple subtraction
    for(int i=0; i<P.length; i++) {
      int index1 = P[i];
      int index2 = Q[i];

      for(int j=0; j<4; j++) { int lowerIndexCount = 0; //when index1 not at beginning of String - need to get occurrences from just before //beginning of slice - to see if that nucleotide occurred within slice //e.g. if slice is (2, 4), need to check for occurrences of A, C, G, T from index 1 to 4 if(index1-1 >= 0) {
          lowerIndexCount = occurrences[index1-1][j];
        }
       
        if(occurrences[index2][j] - lowerIndexCount > 0) {
          answers[i] = j+1; //nucleotide value is 1 more than loop value (A=1, C=2, G=3, T=4)
          //no need to keep checking since always checking from smallest impact factor
          //as soon as find occurrence, that has to be minimum, cause subsequent nucleotides have
          //larger impact factor
          break;
        }
      }
    }
    return answers;
  }
}

TestNG test cases for this problem which all passed:


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
package test.com.codility.lesson05.prefixsums;

import org.testng.Assert;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import com.codility.lesson05.prefixsums.GenomicRangeQuery;

public class GenomicRangeQueryTests {
  private GenomicRangeQuery solution;
 
  @BeforeTest
  public void setUp() {
    solution = new GenomicRangeQuery();
  }

  @DataProvider(name = "test1")
  public Object [][] createData1() {
    return new Object [][] {
      new Object [] {  "C", new int [] { 0, 0, 0 }, new int [] { 0, 0, 0 }, new int [] { 2, 2, 2} },
      new Object [] {  "AA", new int [] { 0, 1, 0 }, new int [] { 0, 1, 1 }, new int [] { 1, 1, 1} },
      new Object [] {  "CC", new int [] { 0, 1, 0 }, new int [] { 0, 1, 1 }, new int [] { 2, 2, 2} },
      new Object [] {  "GG", new int [] { 0, 1, 0 }, new int [] { 0, 1, 1 }, new int [] { 3, 3, 3} },
      new Object [] {  "TT", new int [] { 0, 0, 0 }, new int [] { 1, 1, 1 }, new int [] { 4, 4, 4} },
      new Object [] {  "ATT", new int [] { 0, 0, 0 }, new int [] { 1, 1, 1 }, new int [] { 1, 1, 1} },
      new Object [] {  "CAGCCTA", new int [] { 2, 5, 0 }, new int [] { 4, 5, 6 }, new int [] { 2, 4, 1} },
      new Object [] {  "CAGTCAT", new int [] { 0, 1, 3 }, new int [] { 0, 5, 4 }, new int [] { 2, 1, 2} },
    };
  }

  @Test(dataProvider = "test1")
  public void verifySolution(String pS, int[] pP, int[] pQ, int [] pExpected) {  
    int [] actual = solution.solution(pS, pP, pQ); 
    Assert.assertEquals(actual.length, pExpected.length);
    for(int i=0; i<actual.length; i++) { 
      Assert.assertEquals(actual[i], pExpected[i]);
    }
  }  
}