Java solution to Codility GenomicRangeQuery problem (Lesson 5 – Prefix Sums) which scored 100%. The problem is to find the minimal nucleotide from a range of sequence DNA. The strategy is to:
- count occurence of each nucleotide in each row
- calculate running impact factor sum at each row
- calculate minimum impact factor by doing simple subtraction at the indices
References
Codility Genomic Range Query – Rafal’s Blog (Java)
Solution to Genomic-Range – Code Says (Python)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69 package com.codility.lesson05.prefixsums;
public class GenomicRangeQuery {
//main idea: keep running occurrence count of each nucleotide (A/C/G/T) at every position in S
public int[] solution(String S, int[] P, int[] Q) {
int [] answers = new int[P.length];
int stringLength = S.length();
int [][] occurrences = new int [stringLength][4];
//step 1 - for each row, count occurrences of each nucleotide (can only have 1 occurrence / row)
//e.g. if S=CAGCCTA array will be
//{
// {0,1,0,0} C
// {1,0,0,0} A
// {0,0,1,0} G
// {0,1,0,0} C
// {0,1,0,0} C
// {0,0,0,1} T
// {1,0,0,0} A
// }
for(int i=0; i<occurrences.length; i++) {
char c = S.charAt(i);
if(c == 'A') occurrences[i][0] = 1;
else if(c == 'C') occurrences[i][1] = 1;
else if(c == 'G') occurrences[i][2] = 1;
else if(c == 'T') occurrences[i][3] = 1;
}
//step 2 - for each row (starting from 2nd row), add up occurrences of each nucleotide by adding
//occurrences from previous row to current row
//now have running sum of each nucleotide for any row
//e.g. if S=CAGCCTA array will be
//{
// {0,1,0,0} C
// {1,1,0,0} A
// {1,1,1,0} G
// {1,2,1,0} C
// {1,3,1,0} C
// {1,3,1,1} T
// {2,3,1,1} A
// }
for(int i=1; i<stringLength; i++) {
for(int j=0; j<4; j++) {
occurrences[i][j] += occurrences[i-1][j];
}
}
//check each slice for min by simple subtraction
for(int i=0; i<P.length; i++) {
int index1 = P[i];
int index2 = Q[i];
for(int j=0; j<4; j++) { int lowerIndexCount = 0; //when index1 not at beginning of String - need to get occurrences from just before //beginning of slice - to see if that nucleotide occurred within slice //e.g. if slice is (2, 4), need to check for occurrences of A, C, G, T from index 1 to 4 if(index1-1 >= 0) {
lowerIndexCount = occurrences[index1-1][j];
}
if(occurrences[index2][j] - lowerIndexCount > 0) {
answers[i] = j+1; //nucleotide value is 1 more than loop value (A=1, C=2, G=3, T=4)
//no need to keep checking since always checking from smallest impact factor
//as soon as find occurrence, that has to be minimum, cause subsequent nucleotides have
//larger impact factor
break;
}
}
}
return answers;
}
}
TestNG test cases for this problem which all passed:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40 package test.com.codility.lesson05.prefixsums;
import org.testng.Assert;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import com.codility.lesson05.prefixsums.GenomicRangeQuery;
public class GenomicRangeQueryTests {
private GenomicRangeQuery solution;
@BeforeTest
public void setUp() {
solution = new GenomicRangeQuery();
}
@DataProvider(name = "test1")
public Object [][] createData1() {
return new Object [][] {
new Object [] { "C", new int [] { 0, 0, 0 }, new int [] { 0, 0, 0 }, new int [] { 2, 2, 2} },
new Object [] { "AA", new int [] { 0, 1, 0 }, new int [] { 0, 1, 1 }, new int [] { 1, 1, 1} },
new Object [] { "CC", new int [] { 0, 1, 0 }, new int [] { 0, 1, 1 }, new int [] { 2, 2, 2} },
new Object [] { "GG", new int [] { 0, 1, 0 }, new int [] { 0, 1, 1 }, new int [] { 3, 3, 3} },
new Object [] { "TT", new int [] { 0, 0, 0 }, new int [] { 1, 1, 1 }, new int [] { 4, 4, 4} },
new Object [] { "ATT", new int [] { 0, 0, 0 }, new int [] { 1, 1, 1 }, new int [] { 1, 1, 1} },
new Object [] { "CAGCCTA", new int [] { 2, 5, 0 }, new int [] { 4, 5, 6 }, new int [] { 2, 4, 1} },
new Object [] { "CAGTCAT", new int [] { 0, 1, 3 }, new int [] { 0, 5, 4 }, new int [] { 2, 1, 2} },
};
}
@Test(dataProvider = "test1")
public void verifySolution(String pS, int[] pP, int[] pQ, int [] pExpected) {
int [] actual = solution.solution(pS, pP, pQ);
Assert.assertEquals(actual.length, pExpected.length);
for(int i=0; i<actual.length; i++) {
Assert.assertEquals(actual[i], pExpected[i]);
}
}
}