From 65b578ccdf01a9169b754702452d9e352a67dda4 Mon Sep 17 00:00:00 2001 From: Liam Pattinson Date: Sat, 14 Feb 2026 09:50:02 +0000 Subject: [PATCH 1/3] Change data structures lesson to floating point data --- episodes/04-unit-tests-best-practices.Rmd | 70 +-- episodes/06-floating-point-data.Rmd | 377 ++++++++++++++ episodes/06-testing-data-structures.Rmd | 471 ------------------ .../advanced/advanced_calculator.py | 0 .../advanced/test_advanced_calculator.py | 0 .../calculator.py | 0 .../data_structures.py | 0 .../06-floating-point-data/estimate_pi.py | 10 + .../scripts.py | 0 .../statistics/stats.py | 0 .../statistics/test_stats.py | 0 .../test_calculator.py | 0 .../test_data_structures.py | 6 +- .../test_estimate_pi.py | 15 + .../test_floating_point.py | 12 + .../files/07-fixtures/test_data_structures.py | 6 +- .../test_data_structures.py | 6 +- .../test_data_structures.py | 6 +- 18 files changed, 465 insertions(+), 514 deletions(-) create mode 100644 episodes/06-floating-point-data.Rmd delete mode 100644 episodes/06-testing-data-structures.Rmd rename learners/files/{06-data-structures => 06-floating-point-data}/advanced/advanced_calculator.py (100%) rename learners/files/{06-data-structures => 06-floating-point-data}/advanced/test_advanced_calculator.py (100%) rename learners/files/{06-data-structures => 06-floating-point-data}/calculator.py (100%) rename learners/files/{06-data-structures => 06-floating-point-data}/data_structures.py (100%) create mode 100644 learners/files/06-floating-point-data/estimate_pi.py rename learners/files/{06-data-structures => 06-floating-point-data}/scripts.py (100%) rename learners/files/{06-data-structures => 06-floating-point-data}/statistics/stats.py (100%) rename learners/files/{06-data-structures => 06-floating-point-data}/statistics/test_stats.py (100%) rename learners/files/{06-data-structures => 06-floating-point-data}/test_calculator.py (100%) rename learners/files/{06-data-structures => 06-floating-point-data}/test_data_structures.py (96%) create mode 100644 learners/files/06-floating-point-data/test_estimate_pi.py create mode 100644 learners/files/06-floating-point-data/test_floating_point.py diff --git a/episodes/04-unit-tests-best-practices.Rmd b/episodes/04-unit-tests-best-practices.Rmd index 1cbe4af3..ba86e89c 100644 --- a/episodes/04-unit-tests-best-practices.Rmd +++ b/episodes/04-unit-tests-best-practices.Rmd @@ -4,7 +4,7 @@ teaching: 10 exercises: 2 --- -:::::::::::::::::::::::::::::::::::::: questions +:::::::::::::::::::::::::::::::::::::: questions - What to do about complex functions & tests? - What are some testing best practices for testing? @@ -40,7 +40,7 @@ def process_data(data: list, maximum_value: float): for i in range(len(data_negative_removed)): if data_negative_removed[i] <= maximum_value: data_maximum_removed.append(data_negative_removed[i]) - + # Calculate the mean mean = sum(data_maximum_removed) / len(data_maximum_removed) @@ -63,9 +63,17 @@ def test_process_data(): ``` -This test is very complex and hard to debug if it fails. Imagine if the calculation of the mean broke - the test would fail but it would not tell us what part of the function was broken, requiring us to +This test is hard to debug if it fails. Imagine if the calculation of the mean broke - the test would fail but it would not tell us what part of the function was broken, requiring us to check each function manually to find the bug. Not very efficient! +:::::::::::::::::::::::::::: callout + +Asserting that the standard deviation is equal to 16 decimal +places is also quite error prone. We'll see in a later lesson +how to improve this test. + +:::::::::::::::::::::::::::::::::::: + ## Unit Testing The process of unit testing is a fundamental part of software development. It is where you test individual units or components of a software instead of multiple things at once. @@ -156,10 +164,10 @@ This makes your tests easier to read and understand for both yourself and others def test_calculate_mean(): # Arrange data = [1, 2, 3, 4, 5] - + # Act mean = calculate_mean(data) - + # Assert assert mean == 3 ``` @@ -190,10 +198,10 @@ Here is an example of the TDD process: def test_calculate_mean(): # Arrange data = [1, 2, 3, 4, 5] - + # Act mean = calculate_mean(data) - + # Assert assert mean == 3.5 ``` @@ -244,7 +252,7 @@ Random seeds work by setting the initial state of the random number generator. This means that if you set the seed to the same value, you will get the same sequence of random numbers each time you run the function. -::::::::::::::::::::::::::::::::::::: challenge +::::::::::::::::::::::::::::::::::::: challenge ## Challenge: Write your own unit tests @@ -258,21 +266,21 @@ Take this complex function, break it down and write unit tests for it. import random def randomly_sample_and_filter_participants( - participants: list, - sample_size: int, - min_age: int, - max_age: int, - min_height: int, + participants: list, + sample_size: int, + min_age: int, + max_age: int, + min_height: int, max_height: int ): """Participants is a list of tuples, containing the age and height of each participant participants = [ - {age: 25, height: 180}, - {age: 30, height: 170}, - {age: 35, height: 160}, + {age: 25, height: 180}, + {age: 30, height: 170}, + {age: 35, height: 160}, ] """ - + # Get the indexes to sample indexes = random.sample(range(len(participants)), sample_size) @@ -280,13 +288,13 @@ def randomly_sample_and_filter_participants( sampled_participants = [] for i in indexes: sampled_participants.append(participants[i]) - + # Remove participants that are outside the age range sampled_participants_age_filtered = [] for participant in sampled_participants: if participant['age'] >= min_age and participant['age'] <= max_age: sampled_participants_age_filtered.append(participant) - + # Remove participants that are outside the height range sampled_participants_height_filtered = [] for participant in sampled_participants_age_filtered: @@ -299,7 +307,7 @@ def randomly_sample_and_filter_participants( - Create a new file called `test_stats.py` in the `statistics` directory - Write unit tests for the `randomly_sample_and_filter_participants` function in `test_stats.py` -:::::::::::::::::::::::: solution +:::::::::::::::::::::::: solution The function can be broken down into smaller functions, each of which can be tested separately: @@ -307,7 +315,7 @@ The function can be broken down into smaller functions, each of which can be tes import random def sample_participants( - participants: list, + participants: list, sample_size: int ): indexes = random.sample(range(len(participants)), sample_size) @@ -317,8 +325,8 @@ def sample_participants( return sampled_participants def filter_participants_by_age( - participants: list, - min_age: int, + participants: list, + min_age: int, max_age: int ): filtered_participants = [] @@ -328,8 +336,8 @@ def filter_participants_by_age( return filtered_participants def filter_participants_by_height( - participants: list, - min_height: int, + participants: list, + min_height: int, max_height: int ): filtered_participants = [] @@ -339,11 +347,11 @@ def filter_participants_by_height( return filtered_participants def randomly_sample_and_filter_participants( - participants: list, - sample_size: int, - min_age: int, - max_age: int, - min_height: int, + participants: list, + sample_size: int, + min_age: int, + max_age: int, + min_height: int, max_height: int ): sampled_participants = sample_participants(participants, sample_size) @@ -447,7 +455,7 @@ When time is limited, it's often better to only write tests for the most critica You should discuss with your team how much of the code you think should be tested, and what the most critical parts of the code are in order to prioritize your time. -::::::::::::::::::::::::::::::::::::: keypoints +::::::::::::::::::::::::::::::::::::: keypoints - Complex functions can be broken down into smaller, testable units. - Testing each unit separately is called unit testing. diff --git a/episodes/06-floating-point-data.Rmd b/episodes/06-floating-point-data.Rmd new file mode 100644 index 00000000..57d1454c --- /dev/null +++ b/episodes/06-floating-point-data.Rmd @@ -0,0 +1,377 @@ +--- +title: 'Floating Point Data' +teaching: 10 +exercises: 5 +--- + +:::::::::::::::::::::::::::::::::::::: questions + +- What are the best practices when working with floating point data? +- How do you compare objects in libraries like `pandas` and `numpy`? + +:::::::::::::::::::::::::::::::::::::::::::::::: + +::::::::::::::::::::::::::::::::::::: objectives + +- Learn how to test floating point data with tolerances. +- Learn how to compare objects in libraries like `pandas` and `numpy`. + +:::::::::::::::::::::::::::::::::::::::::::::::: + +## Floating Point Data + +Real numbers are encountered very frequently in research, but it's quite likely +that they won't be 'nice' numbers like 2.0 or 0.0. Instead, the outcome of our +code might be something like `2.34958124890e-31`, and we may only be confident +in that answer to a certain precision. + +Computers typically represent real numbers using a 'floating point' representation, +which truncates their precision to a certain number of decimal places. Floating point +arithmetic errors can cause a significant amount of noise in the last few decimal +places. This can be affected by: + +- Choice of algorithm. +- Precise order of operations. +- Order in which parallel processes finish. +- Inherent randomness in the calculation. + +We could therefore test our code using `assert result == 2.34958124890e-31`, +but it's possible that this test could erroneously fail in future for reasons +outside our control. This lesson will teach best practices for handling this +type of data. + +Libraries like `numpy` and `pandas` are commonly used to interact with large quantities +of floating point numbers, and they provide special functions to assist with testing. + +### Relative and Absolute Tolerances + +Rather than testing that a floating point number is exactly equal to another, +it is preferable to test that it is within a certain tolerance. In most cases, +it is best to use a _relative_ tolerance: + +```python +from math import fabs + +def test_float_rtol(): + actual = my_function() + expected = 7.31926e12 # Reference solution + rtol = 1e-3 + # Use fabs to ensure a positive result! + assert fabs((actual - expected) / expected) < rtol +``` + +In some situations, such as testing a number is close to zero without caring +about exactly how large it is, it is preferable to test within an _absolute_ +tolerance: + +```python +from math import fabs + +def test_float_atol(): + actual = my_function() + expected = 0.0 # Reference solution + atol = 1e-5 + # Use fabs to ensure a positive result! + assert fabs(actual - expected) < atol +``` + + +Let's practice with a function that estimates the value of pi (very +inefficiently!). + +::::::::::::::::::::::::::::::::::::: challenge + +## Testing with tolerances + +- Write this function to a file `estimate_pi.py`: + +``` +import random + +def estimate_pi(iterations): + num_inside = 0 + for _ in range(iterations): + x = random.random() + y = random.random() + if x**2 + y**2 < 1: + num_inside += 1 + return 4 * num_inside / iterations +``` + +- Add a file `test_estimate_pi.py`, and include a test for this function using + both absolute and relative tolerances. +- Find an appropriate number of iterations so that the test finishes quickly, + but keep in mind that both `atol` and `rtol` will need to be modified accordingly! + +:::::::::::::::::::::::: solution + +```python +import random +from math import fabs + +from estimate_pi import estimate_pi + +def test_estimate_pi(): + random.seed(0) + expected = 3.141592654 + actual = estimate_pi(iterations=10000) + # Test absolute tolerance + atol = 1e-2 + assert fabs(actual - expected) < atol + # Test relative tolerance + rtol = 5e-3 + assert fabs((actual - expected) / expected) < rtol +``` + +In this case the absolute and relative tolerances should be similar, as +the expected result is close in magnitude to 1.0, but in principle they could +be very different! + +::::::::::::::::::::::::::::::::: + +The built-in function `math.isclose` can be used to simplify these checks: + +```python +assert math.isclose(a, b, rel_tol=rtol, abs_tol=atol) +``` + +Both `rel_tol` and `abs_tol` may be provided, and it will return `True` +if either of the conditions are satisfied. + +::::::::::::::::::::::::::::::::::::: challenge + +## Using `math.isclose` + +- Adapt the test you wrote in the previous challenge to make use of + the `math.isclose` function. + +:::::::::::::::::::::::: solution + +```python +import random +from math import fabs + +from estimate_pi import estimate_pi + +def test_estimate_pi(): + random.seed(0) + expected = 3.141592654 + actual = estimate_pi(iterations=10000) + atol = 1e-2 + rtol = 5e-3 + assert math.isclose(actual, expected, abs_tol=atol rel_tol=rtol) +``` + +::::::::::::::::::::::::::::::::: + +### NumPy + +NumPy is a common library used in research. Instead of the usual `assert a == +b`, NumPy has its own testing functions that are more suitable for comparing +NumPy arrays. These functions are the ones you are most likely to use: + +- `numpy.testing.assert_array_equal` is used to compare two NumPy arrays or array-like objects (such as list, tuples, etc). +- `numpy.testing.assert_allclose` is used to compare two NumPy arrays or array-like objects with a tolerance for floating point numbers. + +These may also be used on individual floating point numbers if you choose. + +Here are some examples of how to use these functions: + +```python + +def test_numpy_arrays(): + """Test that numpy arrays are equal""" + # Create two numpy arrays + array1 = np.array([1, 2, 3]) + array2 = np.array([1, 2, 3]) + # Check that the arrays are equal + np.testing.assert_array_equal(array1, array2) + +# Note that np.testing.assert_array_equal even works with multidimensional numpy arrays! + +def test_2d_numpy_arrays(): + """Test that 2d numpy arrays are equal""" + # Create two 2d numpy arrays + array1 = np.array([[1, 2], [3, 4]]) + array2 = np.array([[1, 2], [3, 4]]) + # Check that the nested arrays are equal + np.testing.assert_array_equal(array1, array2) + +def test_numpy_arrays_with_tolerance(): + """Test that numpy arrays are equal with tolerance""" + # Create two numpy arrays + array1 = np.array([1.0, 2.0, 3.0]) + array2 = np.array([1.00009, 2.0005, 3.0001]) + # Check that the arrays are equal with tolerance + np.testing.assert_allclose(array1, array2, atol=1e-3) +``` + +::::::::::::::::::::::::::::::::::::: callout + +### Data structures with numpy arrays + +When you have data structures that contain numpy arrays, such as lists or dictionaries, you cannot use `==` to compare them. +Instead, you can use `numpy.testing.assert_equal` to compare the data structures. + +```python +def test_dictionaries_with_numpy_arrays(): + """Test that dictionaries with numpy arrays are equal""" + # Create two dictionaries with numpy arrays + dict1 = {"a": np.array([1, 2, 3]), "b": np.array([4, 5, 6])} + dict2 = {"a": np.array([1, 2, 3]), "b": np.array([4, 5, 6])} + # Check that the dictionaries are equal + np.testing.assert_equal(dict1, dict2) +``` + +:::::::::::::::::::::::::::::::::::::::::::::::: + + +### pandas + +Pandas is another common library used in research for storing and manipulating datasets. +Pandas has its own testing functions that are more suitable for comparing Pandas objects. +These two functions are the ones you are most likely to use: +- `pandas.testing.assert_frame_equal` is used to compare two Pandas DataFrames. +- `pandas.testing.assert_series_equal` is used to compare two Pandas Series. + + +Here are some examples of how to use these functions: + +```python + +def test_pandas_dataframes(): + """Test that pandas DataFrames are equal""" + # Create two pandas DataFrames + df1 = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + df2 = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + # Check that the DataFrames are equal + pd.testing.assert_frame_equal(df1, df2) + +def test_pandas_series(): + """Test that pandas Series are equal""" + # Create two pandas Series + s1 = pd.Series([1, 2, 3]) + s2 = pd.Series([1, 2, 3]) + # Check that the Series are equal + pd.testing.assert_series_equal(s1, s2) +``` + +There is no equivalent to `np.assert_allclose` in Pandas. If you need to compare DataFrames +or Series containing floating point data, it is recommended to use the `np.testing` functions directly +on the Pandas objects. + + +::::::::::::::::::::::::::::::::::::: challenge + +### Checking if NumPy arrays are equal + +In `statistics/stats.py` add this function to calculate the cumulative sum of a NumPy array: + +```python +import numpy as np + +def calculate_cumulative_sum(array: np.ndarray) -> np.ndarray: + """Calculate the cumulative sum of a numpy array""" + + # don't use the built-in numpy function + result = np.zeros(array.shape) + result[0] = array[0] + for i in range(1, len(array)): + result[i] = result[i-1] + array[i] + + return result +``` + +Then write a test for this function by comparing NumPy arrays. + +:::::::::::::::::::::::: solution + +```python +import numpy as np +from stats import calculate_cumulative_sum + +def test_calculate_cumulative_sum(): + """Test calculate_cumulative_sum function""" + array = np.array([1, 2, 3, 4, 5]) + expected_result = np.array([1, 3, 6, 10, 15]) + np.testing.assert_array_equal(calculate_cumulative_sum(array), expected_result) +``` + +::::::::::::::::::::::::::::::::: + +### Checking if Pandas DataFrames are equal + +In `statistics/stats.py` add this function to calculate the average score of each player in a Pandas DataFrame: + +```python +import pandas as pd + +def calculate_player_average_scores(df: pd.DataFrame) -> pd.DataFrame: + """Calculate the average score of each player in a pandas DataFrame. + + Example input: + | | player | score_1 | score_2 | + |---|---------|---------|---------| + | 0 | Alice | 1 | 2 | + | 1 | Bob | 3 | 4 | + + Example output: + | | player | score_1 | score_2 | average_score | + |---|---------|---------|---------|---------------| + | 0 | Alice | 1 | 2 | 1.5 | + | 1 | Bob | 3 | 4 | 3.5 | + """ + + df["average_score"] = df[["score_1", "score_2"]].mean(axis=1) + + return df +``` + +Then write a test for this function by comparing Pandas DataFrames. + +Hint: You can create a dataframe like this: + +```python +df = pd.DataFrame({ + "player": ["Alice", "Bob"], + "score_1": [1, 3], + "score_2": [2, 4] +}) +``` + +:::::::::::::::::::::::: solution + +```python +import pandas as pd +from stats import calculate_player_average_scores + +def test_calculate_player_average_scores(): + """Test calculate_player_average_scores function""" + df = pd.DataFrame({ + "player": ["Alice", "Bob"], + "score_1": [1, 3], + "score_2": [2, 4] + }) + expected_result = pd.DataFrame({ + "player": ["Alice", "Bob"], + "score_1": [1, 3], + "score_2": [2, 4], + "average_score": [1.5, 3.5] + }) + pd.testing.assert_frame_equal(calculate_player_average_scores(df), expected_result) +``` + +::::::::::::::::::::::::::::::::: + + +:::::::::::::::::::::::::::::::::::::::::::::::: + + +::::::::::::::::::::::::::::::::::::: keypoints + +- When comparing floating point data, you should use relative/absolute tolerances instead of testing for equality. +- Numpy arrays cannot be compared using the `==` operator. Instead, use `numpy.testing.assert_array_equal` and `numpy.testing.assert_allclose`. +- Pandas DataFrames and Series should be compared using `pandas.testing.assert_frame_equal` and `pandas.testing.assert_series_equal`. + +:::::::::::::::::::::::::::::::::::::::::::::::: + diff --git a/episodes/06-testing-data-structures.Rmd b/episodes/06-testing-data-structures.Rmd deleted file mode 100644 index 8b82784a..00000000 --- a/episodes/06-testing-data-structures.Rmd +++ /dev/null @@ -1,471 +0,0 @@ ---- -title: 'Testing Data Structures' -teaching: 10 -exercises: 2 ---- - -:::::::::::::::::::::::::::::::::::::: questions - -- How do you compare data structures such as lists and dictionaries? -- How do you compare objects in libraries like `pandas` and `numpy`? - -:::::::::::::::::::::::::::::::::::::::::::::::: - -::::::::::::::::::::::::::::::::::::: objectives - -- Learn how to compare lists and dictionaries in Python. -- Learn how to compare objects in libraries like `pandas` and `numpy`. - -:::::::::::::::::::::::::::::::::::::::::::::::: - -## Data structures - -When writing tests for your code, you often need to compare data structures such as lists, dictionaries, and objects from libraries like `numpy` and `pandas`. -Here we will go over some of the more common data structures that you may use in research and how to test them. - -### Lists - -Python lists can be tested using the usual `==` operator as we do for numbers. - -```python - -def test_lists_equal(): - """Test that lists are equal""" - # Create two lists - list1 = [1, 2, 3] - list2 = [1, 2, 3] - # Check that the lists are equal - assert list1 == list2 - - # Two lists, different order - list3 = [1, 2, 3] - list4 = [3, 2, 1] - assert list3 != list4 - - # Create two different lists - list5 = [1, 2, 3] - list6 = [1, 2, 4] - # Check that the lists are not equal - assert list5 != list6 - -``` - -Note that the order of elements in the list matters. If you want to check that two lists contain the same elements but in different order, you can use the `sorted` function. - -```python -def test_sorted_lists_equal(): - """Test that lists are equal""" - # Create two lists - list1 = [1, 2, 3] - list2 = [1, 2, 3] - # Check that the lists are equal - assert sorted(list1) == sorted(list2) - - # Two lists, different order - list3 = [1, 2, 3] - list4 = [3, 2, 1] - assert sorted(list3) == sorted(list4) - - # Create two different lists - list5 = [1, 2, 3] - list6 = [1, 2, 4] - # Check that the lists are not equal - assert sorted(list5) != sorted(list6) - -``` - -### Dictionaries - -Python dictionaries can also be tested using the `==` operator, however, the order of the keys does not matter. -This means that if you have two dictionaries with the same keys and values, but in different order, they will still be considered equal. - -The reason for this is that dictionaries are unordered collections of key-value pairs. -(If you need to preserve the order of keys, you can use the `collections.OrderedDict` class.) - -```python -def test_dictionaries_equal(): - """Test that dictionaries are equal""" - # Create two dictionaries - dict1 = {"a": 1, "b": 2, "c": 3} - dict2 = {"a": 1, "b": 2, "c": 3} - # Check that the dictionaries are equal - assert dict1 == dict2 - - # Create two dictionaries, different order - dict3 = {"a": 1, "b": 2, "c": 3} - dict4 = {"c": 3, "b": 2, "a": 1} - assert dict3 == dict4 - - # Create two different dictionaries - dict5 = {"a": 1, "b": 2, "c": 3} - dict6 = {"a": 1, "b": 2, "c": 4} - # Check that the dictionaries are not equal - assert dict5 != dict6 -``` - -### numpy - -Numpy is a common library used in research. -Instead of the usual `assert a == b`, numpy has its own testing functions that are more suitable for comparing numpy arrays. -These two functions are the ones you are most likely to use: -- `numpy.testing.assert_array_equal` is used to compare two numpy arrays. -- `numpy.testing.assert_allclose` is used to compare two numpy arrays with a tolerance for floating point numbers. -- `numpy.testing.assert_equal` is used to compare two objects such as lists or dictionaries that contain numpy arrays. - -Here are some examples of how to use these functions: - -```python - -def test_numpy_arrays(): - """Test that numpy arrays are equal""" - # Create two numpy arrays - array1 = np.array([1, 2, 3]) - array2 = np.array([1, 2, 3]) - # Check that the arrays are equal - np.testing.assert_array_equal(array1, array2) - -# Note that np.testing.assert_array_equal even works with nested numpy arrays! - -def test_nested_numpy_arrays(): - """Test that nested numpy arrays are equal""" - # Create two nested numpy arrays - array1 = np.array([[1, 2], [3, 4]]) - array2 = np.array([[1, 2], [3, 4]]) - # Check that the nested arrays are equal - np.testing.assert_array_equal(array1, array2) - -def test_numpy_arrays_with_tolerance(): - """Test that numpy arrays are equal with tolerance""" - # Create two numpy arrays - array1 = np.array([1.0, 2.0, 3.0]) - array2 = np.array([1.00009, 2.0005, 3.0001]) - # Check that the arrays are equal with tolerance - np.testing.assert_allclose(array1, array2, atol=1e-3) -``` - -::::::::::::::::::::::::::::::::::::: callout - -### Data structures with numpy arrays - -When you have data structures that contain numpy arrays, such as lists or dictionaries, you cannot use `==` to compare them. -Instead, you can use `numpy.testing.assert_equal` to compare the data structures. - -```python -def test_dictionaries_with_numpy_arrays(): - """Test that dictionaries with numpy arrays are equal""" - # Create two dictionaries with numpy arrays - dict1 = {"a": np.array([1, 2, 3]), "b": np.array([4, 5, 6])} - dict2 = {"a": np.array([1, 2, 3]), "b": np.array([4, 5, 6])} - # Check that the dictionaries are equal - np.testing.assert_equal(dict1, dict2) -``` - -:::::::::::::::::::::::::::::::::::::::::::::::: - - -### pandas - -Pandas is another common library used in research for storing and manipulating datasets. -Pandas has its own testing functions that are more suitable for comparing pandas objects. -These two functions are the ones you are most likely to use: -- `pandas.testing.assert_frame_equal` is used to compare two pandas DataFrames. -- `pandas.testing.assert_series_equal` is used to compare two pandas Series. - - -Here are some examples of how to use these functions: - -```python - -def test_pandas_dataframes(): - """Test that pandas DataFrames are equal""" - # Create two pandas DataFrames - df1 = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) - df2 = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) - # Check that the DataFrames are equal - pd.testing.assert_frame_equal(df1, df2) - -def test_pandas_series(): - """Test that pandas Series are equal""" - # Create two pandas Series - s1 = pd.Series([1, 2, 3]) - s2 = pd.Series([1, 2, 3]) - # Check that the Series are equal - pd.testing.assert_series_equal(s1, s2) -``` - - -::::::::::::::::::::::::::::::::::::: challenge - -## Challenge : Comparing Data Structures - -### Checking if lists are equal - -In `statistics/stats.py` add this function to remove anomalies from a list: - -```python -def remove_anomalies(data: list, maximum_value: float, minimum_value: float) -> list: - """Remove anomalies from a list of numbers""" - - result = [] - - for i in data: - if i <= maximum_value and i >= minimum_value: - result.append(i) - - return result -``` - -Then write a test for this function by comparing lists. - -:::::::::::::::::::::::: solution - -```python -from stats import remove_anomalies - -def test_remove_anomalies(): - """Test remove_anomalies function""" - data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - maximum_value = 5 - minimum_value = 2 - expected_result = [2, 3, 4, 5] - assert remove_anomalies(data, maximum_value, minimum_value) == expected_result -``` - -::::::::::::::::::::::::::::::::: - -### Checking if dictionaries are equal - -In `statistics/stats.py` add this function to calculate the frequency of each element in a list: - -```python -def calculate_frequency(data: list) -> dict: - """Calculate the frequency of each element in a list""" - - frequencies = {} - - # Iterate over each value in the list - for value in data: - # If the value is already in the dictionary, increment the count - if value in frequencies: - frequencies[value] += 1 - # Otherwise, add the value to the dictionary with a count of 1 - else: - frequencies[value] = 1 - - return frequencies -``` - -Then write a test for this function by comparing dictionaries. - -:::::::::::::::::::::::: solution - -```python -from stats import calculate_frequency - -def test_calculate_frequency(): - """Test calculate_frequency function""" - data = [1, 2, 3, 1, 2, 1, 1, 3, 3, 3] - expected_result = {1: 4, 2: 2, 3: 4} - assert calculate_frequency(data) == expected_result -``` - -::::::::::::::::::::::::::::::::: - -### Checking if numpy arrays are equal - -In `statistics/stats.py` add this function to calculate the cumulative sum of a numpy array: - -```python -import numpy as np - -def calculate_cumulative_sum(array: np.ndarray) -> np.ndarray: - """Calculate the cumulative sum of a numpy array""" - - # don't use the built-in numpy function - result = np.zeros(array.shape) - result[0] = array[0] - for i in range(1, len(array)): - result[i] = result[i-1] + array[i] - - return result -``` - -Then write a test for this function by comparing numpy arrays. - -:::::::::::::::::::::::: solution - -```python -import numpy as np -from stats import calculate_cumulative_sum - -def test_calculate_cumulative_sum(): - """Test calculate_cumulative_sum function""" - array = np.array([1, 2, 3, 4, 5]) - expected_result = np.array([1, 3, 6, 10, 15]) - np.testing.assert_array_equal(calculate_cumulative_sum(array), expected_result) -``` - -::::::::::::::::::::::::::::::::: - -### Checking if data structures with numpy arrays are equal - -In `statistics/stats.py` add this function to calculate the total score of each player in a dictionary: - -```python - -def calculate_player_total_scores(participants: dict): - """Calculate the total score of each player in a dictionary. - - Example input: - { - "Alice": { - "scores": np.array([1, 2, 3]) - }, - "Bob": { - "scores": np.array([4, 5, 6]) - }, - "Charlie": { - "scores": np.array([7, 8, 9]) - }, - } - - Example output: - { - "Alice": { - "scores": np.array([1, 2, 3]), - "total_score": 6 - }, - "Bob": { - "scores": np.array([4, 5, 6]), - "total_score": 15 - }, - "Charlie": { - "scores": np.array([7, 8, 9]), - "total_score": 24 - }, - } - """" - - for player in participants: - participants[player]["total_score"] = np.sum(participants[player]["scores"]) - - return participants -``` - -Then write a test for this function by comparing dictionaries with numpy arrays. - -:::::::::::::::::::::::: solution - -```python -import numpy as np -from stats import calculate_player_total_scores - -def test_calculate_player_total_scores(): - """Test calculate_player_total_scores function""" - participants = { - "Alice": { - "scores": np.array([1, 2, 3]) - }, - "Bob": { - "scores": np.array([4, 5, 6]) - }, - "Charlie": { - "scores": np.array([7, 8, 9]) - }, - } - expected_result = { - "Alice": { - "scores": np.array([1, 2, 3]), - "total_score": 6 - }, - "Bob": { - "scores": np.array([4, 5, 6]), - "total_score": 15 - }, - "Charlie": { - "scores": np.array([7, 8, 9]), - "total_score": 24 - }, - } - np.testing.assert_equal(calculate_player_total_scores(participants), expected_result) -``` - -::::::::::::::::::::::::::::::::: - -### Checking if pandas DataFrames are equal - -In `statistics/stats.py` add this function to calculate the average score of each player in a pandas DataFrame: - -```python -import pandas as pd - -def calculate_player_average_scores(df: pd.DataFrame) -> pd.DataFrame: - """Calculate the average score of each player in a pandas DataFrame. - - Example input: - | | player | score_1 | score_2 | - |---|---------|---------|---------| - | 0 | Alice | 1 | 2 | - | 1 | Bob | 3 | 4 | - - Example output: - | | player | score_1 | score_2 | average_score | - |---|---------|---------|---------|---------------| - | 0 | Alice | 1 | 2 | 1.5 | - | 1 | Bob | 3 | 4 | 3.5 | - """ - - df["average_score"] = df[["score_1", "score_2"]].mean(axis=1) - - return df -``` - -Then write a test for this function by comparing pandas DataFrames. - -Hint: You can create a dataframe like this: - -```python -df = pd.DataFrame({ - "player": ["Alice", "Bob"], - "score_1": [1, 3], - "score_2": [2, 4] -}) -``` - -:::::::::::::::::::::::: solution - -```python -import pandas as pd -from stats import calculate_player_average_scores - -def test_calculate_player_average_scores(): - """Test calculate_player_average_scores function""" - df = pd.DataFrame({ - "player": ["Alice", "Bob"], - "score_1": [1, 3], - "score_2": [2, 4] - }) - expected_result = pd.DataFrame({ - "player": ["Alice", "Bob"], - "score_1": [1, 3], - "score_2": [2, 4], - "average_score": [1.5, 3.5] - }) - pd.testing.assert_frame_equal(calculate_player_average_scores(df), expected_result) -``` - -::::::::::::::::::::::::::::::::: - - -:::::::::::::::::::::::::::::::::::::::::::::::: - - -::::::::::::::::::::::::::::::::::::: keypoints - -- You can test equality of lists and dictionaries using the `==` operator. -- Numpy arrays cannot be compared using the `==` operator. Instead, use `numpy.testing.assert_array_equal` and `numpy.testing.assert_allclose`. -- Data structures that contain numpy arrays should be compared using `numpy.testing.assert_equal`. -- Pandas DataFrames and Series should be compared using `pandas.testing.assert_frame_equal` and `pandas.testing.assert_series_equal`. - -:::::::::::::::::::::::::::::::::::::::::::::::: - diff --git a/learners/files/06-data-structures/advanced/advanced_calculator.py b/learners/files/06-floating-point-data/advanced/advanced_calculator.py similarity index 100% rename from learners/files/06-data-structures/advanced/advanced_calculator.py rename to learners/files/06-floating-point-data/advanced/advanced_calculator.py diff --git a/learners/files/06-data-structures/advanced/test_advanced_calculator.py b/learners/files/06-floating-point-data/advanced/test_advanced_calculator.py similarity index 100% rename from learners/files/06-data-structures/advanced/test_advanced_calculator.py rename to learners/files/06-floating-point-data/advanced/test_advanced_calculator.py diff --git a/learners/files/06-data-structures/calculator.py b/learners/files/06-floating-point-data/calculator.py similarity index 100% rename from learners/files/06-data-structures/calculator.py rename to learners/files/06-floating-point-data/calculator.py diff --git a/learners/files/06-data-structures/data_structures.py b/learners/files/06-floating-point-data/data_structures.py similarity index 100% rename from learners/files/06-data-structures/data_structures.py rename to learners/files/06-floating-point-data/data_structures.py diff --git a/learners/files/06-floating-point-data/estimate_pi.py b/learners/files/06-floating-point-data/estimate_pi.py new file mode 100644 index 00000000..4f1bd6ba --- /dev/null +++ b/learners/files/06-floating-point-data/estimate_pi.py @@ -0,0 +1,10 @@ +import random + +def estimate_pi(iterations): + num_inside = 0 + for _ in range(iterations): + x = random.random() + y = random.random() + if x**2 + y**2 < 1: + num_inside += 1 + return 4 * num_inside / iterations diff --git a/learners/files/06-data-structures/scripts.py b/learners/files/06-floating-point-data/scripts.py similarity index 100% rename from learners/files/06-data-structures/scripts.py rename to learners/files/06-floating-point-data/scripts.py diff --git a/learners/files/06-data-structures/statistics/stats.py b/learners/files/06-floating-point-data/statistics/stats.py similarity index 100% rename from learners/files/06-data-structures/statistics/stats.py rename to learners/files/06-floating-point-data/statistics/stats.py diff --git a/learners/files/06-data-structures/statistics/test_stats.py b/learners/files/06-floating-point-data/statistics/test_stats.py similarity index 100% rename from learners/files/06-data-structures/statistics/test_stats.py rename to learners/files/06-floating-point-data/statistics/test_stats.py diff --git a/learners/files/06-data-structures/test_calculator.py b/learners/files/06-floating-point-data/test_calculator.py similarity index 100% rename from learners/files/06-data-structures/test_calculator.py rename to learners/files/06-floating-point-data/test_calculator.py diff --git a/learners/files/06-data-structures/test_data_structures.py b/learners/files/06-floating-point-data/test_data_structures.py similarity index 96% rename from learners/files/06-data-structures/test_data_structures.py rename to learners/files/06-floating-point-data/test_data_structures.py index 00f3cd2d..57a770d9 100644 --- a/learners/files/06-data-structures/test_data_structures.py +++ b/learners/files/06-floating-point-data/test_data_structures.py @@ -71,9 +71,9 @@ def test_numpy_arrays(): np.testing.assert_array_equal(array1, array2) -def test_nested_numpy_arrays(): - """Test that nested numpy arrays are equal""" - # Create two nested numpy arrays +def test_2d_numpy_arrays(): + """Test that 2d numpy arrays are equal""" + # Create two 2d numpy arrays array1 = np.array([[1, 2], [3, 4]]) array2 = np.array([[1, 2], [3, 4]]) # Check that the nested arrays are equal diff --git a/learners/files/06-floating-point-data/test_estimate_pi.py b/learners/files/06-floating-point-data/test_estimate_pi.py new file mode 100644 index 00000000..0edcd966 --- /dev/null +++ b/learners/files/06-floating-point-data/test_estimate_pi.py @@ -0,0 +1,15 @@ +import random +from math import fabs + +from estimate_pi import estimate_pi + +def test_estimate_pi(): + random.seed(0) + expected = 3.141592654 + actual = estimate_pi(iterations=10000) + # Test absolute tolerance + atol = 1e-2 + assert fabs(actual - expected) < atol + # Test relative tolerance + rtol = 5e-3 + assert fabs((actual - expected) / expected) < rtol diff --git a/learners/files/06-floating-point-data/test_floating_point.py b/learners/files/06-floating-point-data/test_floating_point.py new file mode 100644 index 00000000..c11c0349 --- /dev/null +++ b/learners/files/06-floating-point-data/test_floating_point.py @@ -0,0 +1,12 @@ +from math import fabs +from random import random + +def estimate_pi(iterations): + num_inside = 0 + for _ in range(iterations): + x = random() + y = random() + if x**2 + y**2 < 1: + num_inside += 1 + return 4 * num_inside / iterations + diff --git a/learners/files/07-fixtures/test_data_structures.py b/learners/files/07-fixtures/test_data_structures.py index 00f3cd2d..57a770d9 100644 --- a/learners/files/07-fixtures/test_data_structures.py +++ b/learners/files/07-fixtures/test_data_structures.py @@ -71,9 +71,9 @@ def test_numpy_arrays(): np.testing.assert_array_equal(array1, array2) -def test_nested_numpy_arrays(): - """Test that nested numpy arrays are equal""" - # Create two nested numpy arrays +def test_2d_numpy_arrays(): + """Test that 2d numpy arrays are equal""" + # Create two 2d numpy arrays array1 = np.array([[1, 2], [3, 4]]) array2 = np.array([[1, 2], [3, 4]]) # Check that the nested arrays are equal diff --git a/learners/files/08-parametrization/test_data_structures.py b/learners/files/08-parametrization/test_data_structures.py index 00f3cd2d..57a770d9 100644 --- a/learners/files/08-parametrization/test_data_structures.py +++ b/learners/files/08-parametrization/test_data_structures.py @@ -71,9 +71,9 @@ def test_numpy_arrays(): np.testing.assert_array_equal(array1, array2) -def test_nested_numpy_arrays(): - """Test that nested numpy arrays are equal""" - # Create two nested numpy arrays +def test_2d_numpy_arrays(): + """Test that 2d numpy arrays are equal""" + # Create two 2d numpy arrays array1 = np.array([[1, 2], [3, 4]]) array2 = np.array([[1, 2], [3, 4]]) # Check that the nested arrays are equal diff --git a/learners/files/09-testing-output-files/test_data_structures.py b/learners/files/09-testing-output-files/test_data_structures.py index 00f3cd2d..57a770d9 100644 --- a/learners/files/09-testing-output-files/test_data_structures.py +++ b/learners/files/09-testing-output-files/test_data_structures.py @@ -71,9 +71,9 @@ def test_numpy_arrays(): np.testing.assert_array_equal(array1, array2) -def test_nested_numpy_arrays(): - """Test that nested numpy arrays are equal""" - # Create two nested numpy arrays +def test_2d_numpy_arrays(): + """Test that 2d numpy arrays are equal""" + # Create two 2d numpy arrays array1 = np.array([[1, 2], [3, 4]]) array2 = np.array([[1, 2], [3, 4]]) # Check that the nested arrays are equal From 477013e9be5f86203b91f4fbb7d7ec6e861ee9dc Mon Sep 17 00:00:00 2001 From: Liam Pattinson Date: Sat, 14 Feb 2026 11:24:46 +0000 Subject: [PATCH 2/3] Strip pandas material TODO: remove from stats files --- config.yaml | 2 +- episodes/06-floating-point-data.Rmd | 153 ++++-------------- .../06-floating-point-data/data_structures.py | 2 - .../test_data_structures.py | 123 -------------- .../06-floating-point-data/test_numpy.py | 27 ++++ learners/files/07-fixtures/data_structures.py | 2 - .../files/07-fixtures/test_data_structures.py | 123 -------------- learners/files/07-fixtures/test_numpy.py | 27 ++++ .../08-parametrization/data_structures.py | 2 - .../test_data_structures.py | 123 -------------- .../files/08-parametrization/test_numpy.py | 27 ++++ .../data_structures.py | 2 - .../test_data_structures.py | 123 -------------- .../09-testing-output-files/test_numpy.py | 27 ++++ learners/setup.md | 43 +++-- 15 files changed, 170 insertions(+), 636 deletions(-) delete mode 100644 learners/files/06-floating-point-data/data_structures.py delete mode 100644 learners/files/06-floating-point-data/test_data_structures.py create mode 100644 learners/files/06-floating-point-data/test_numpy.py delete mode 100644 learners/files/07-fixtures/data_structures.py delete mode 100644 learners/files/07-fixtures/test_data_structures.py create mode 100644 learners/files/07-fixtures/test_numpy.py delete mode 100644 learners/files/08-parametrization/data_structures.py delete mode 100644 learners/files/08-parametrization/test_data_structures.py create mode 100644 learners/files/08-parametrization/test_numpy.py delete mode 100644 learners/files/09-testing-output-files/data_structures.py delete mode 100644 learners/files/09-testing-output-files/test_data_structures.py create mode 100644 learners/files/09-testing-output-files/test_numpy.py diff --git a/config.yaml b/config.yaml index 7d3de2b7..e2767493 100644 --- a/config.yaml +++ b/config.yaml @@ -65,7 +65,7 @@ episodes: - 03-interacting-with-tests.Rmd - 04-unit-tests-best-practices.Rmd - 05-testing-exceptions.Rmd -- 06-testing-data-structures.Rmd +- 06-floating-point-data.Rmd - 07-fixtures.Rmd - 08-parametrization.Rmd - 09-testing-output-files.Rmd diff --git a/episodes/06-floating-point-data.Rmd b/episodes/06-floating-point-data.Rmd index 57d1454c..91dfd3cf 100644 --- a/episodes/06-floating-point-data.Rmd +++ b/episodes/06-floating-point-data.Rmd @@ -7,14 +7,14 @@ exercises: 5 :::::::::::::::::::::::::::::::::::::: questions - What are the best practices when working with floating point data? -- How do you compare objects in libraries like `pandas` and `numpy`? +- How do you compare objects in libraries like `numpy`? :::::::::::::::::::::::::::::::::::::::::::::::: ::::::::::::::::::::::::::::::::::::: objectives - Learn how to test floating point data with tolerances. -- Learn how to compare objects in libraries like `pandas` and `numpy`. +- Learn how to compare objects in libraries like `numpy`. :::::::::::::::::::::::::::::::::::::::::::::::: @@ -40,8 +40,9 @@ but it's possible that this test could erroneously fail in future for reasons outside our control. This lesson will teach best practices for handling this type of data. -Libraries like `numpy` and `pandas` are commonly used to interact with large quantities -of floating point numbers, and they provide special functions to assist with testing. +Libraries like NumPy, SciPy, and Pandas are commonly used to interact +with large quantities of floating point numbers. NumPy provides special +functions to assist with testing. ### Relative and Absolute Tolerances @@ -129,6 +130,8 @@ be very different! ::::::::::::::::::::::::::::::::: +::::::::::::::::::::::::::::::::::::::::::::::: + The built-in function `math.isclose` can be used to simplify these checks: ```python @@ -164,16 +167,18 @@ def test_estimate_pi(): ::::::::::::::::::::::::::::::::: +::::::::::::::::::::::::::::::::::::::::::::::: + ### NumPy NumPy is a common library used in research. Instead of the usual `assert a == b`, NumPy has its own testing functions that are more suitable for comparing NumPy arrays. These functions are the ones you are most likely to use: -- `numpy.testing.assert_array_equal` is used to compare two NumPy arrays or array-like objects (such as list, tuples, etc). -- `numpy.testing.assert_allclose` is used to compare two NumPy arrays or array-like objects with a tolerance for floating point numbers. - -These may also be used on individual floating point numbers if you choose. +- `numpy.testing.assert_array_equal` is used to compare two NumPy arrays for + equality -- best used for integer data. +- `numpy.testing.assert_allclose` is used to compare two NumPy arrays with a + tolerance for floating point numbers. Here are some examples of how to use these functions: @@ -206,59 +211,23 @@ def test_numpy_arrays_with_tolerance(): np.testing.assert_allclose(array1, array2, atol=1e-3) ``` -::::::::::::::::::::::::::::::::::::: callout - -### Data structures with numpy arrays - -When you have data structures that contain numpy arrays, such as lists or dictionaries, you cannot use `==` to compare them. -Instead, you can use `numpy.testing.assert_equal` to compare the data structures. - -```python -def test_dictionaries_with_numpy_arrays(): - """Test that dictionaries with numpy arrays are equal""" - # Create two dictionaries with numpy arrays - dict1 = {"a": np.array([1, 2, 3]), "b": np.array([4, 5, 6])} - dict2 = {"a": np.array([1, 2, 3]), "b": np.array([4, 5, 6])} - # Check that the dictionaries are equal - np.testing.assert_equal(dict1, dict2) -``` - -:::::::::::::::::::::::::::::::::::::::::::::::: +The NumPy testing functions can be used on anything NumPy considers to be 'array-like'. +This includes lists, tuples, and even individual floating point numbers if you choose. +They can also be used for other objects in the scientific Python ecosystem, such +as Pandas Series/DataFrames. +:::::::::::::::::::::::: callout -### pandas +The Pandas library also provides its own testing functions: -Pandas is another common library used in research for storing and manipulating datasets. -Pandas has its own testing functions that are more suitable for comparing Pandas objects. -These two functions are the ones you are most likely to use: -- `pandas.testing.assert_frame_equal` is used to compare two Pandas DataFrames. -- `pandas.testing.assert_series_equal` is used to compare two Pandas Series. +- `pandas.testing.assert_frame_equal` +- `pandas.testing.assert_series_equal` +These functions can also take `rtol` and `atol` arguments, so can fulfill the +role of both `numpy.testing.assert_array_equal` and +`numpy.testing.assert_allclose`. -Here are some examples of how to use these functions: - -```python - -def test_pandas_dataframes(): - """Test that pandas DataFrames are equal""" - # Create two pandas DataFrames - df1 = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) - df2 = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) - # Check that the DataFrames are equal - pd.testing.assert_frame_equal(df1, df2) - -def test_pandas_series(): - """Test that pandas Series are equal""" - # Create two pandas Series - s1 = pd.Series([1, 2, 3]) - s2 = pd.Series([1, 2, 3]) - # Check that the Series are equal - pd.testing.assert_series_equal(s1, s2) -``` - -There is no equivalent to `np.assert_allclose` in Pandas. If you need to compare DataFrames -or Series containing floating point data, it is recommended to use the `np.testing` functions directly -on the Pandas objects. +:::::::::::::::::::::::::::::::: ::::::::::::::::::::::::::::::::::::: challenge @@ -299,79 +268,15 @@ def test_calculate_cumulative_sum(): ::::::::::::::::::::::::::::::::: -### Checking if Pandas DataFrames are equal - -In `statistics/stats.py` add this function to calculate the average score of each player in a Pandas DataFrame: - -```python -import pandas as pd - -def calculate_player_average_scores(df: pd.DataFrame) -> pd.DataFrame: - """Calculate the average score of each player in a pandas DataFrame. - - Example input: - | | player | score_1 | score_2 | - |---|---------|---------|---------| - | 0 | Alice | 1 | 2 | - | 1 | Bob | 3 | 4 | - - Example output: - | | player | score_1 | score_2 | average_score | - |---|---------|---------|---------|---------------| - | 0 | Alice | 1 | 2 | 1.5 | - | 1 | Bob | 3 | 4 | 3.5 | - """ - - df["average_score"] = df[["score_1", "score_2"]].mean(axis=1) - - return df -``` - -Then write a test for this function by comparing Pandas DataFrames. - -Hint: You can create a dataframe like this: - -```python -df = pd.DataFrame({ - "player": ["Alice", "Bob"], - "score_1": [1, 3], - "score_2": [2, 4] -}) -``` - -:::::::::::::::::::::::: solution - -```python -import pandas as pd -from stats import calculate_player_average_scores - -def test_calculate_player_average_scores(): - """Test calculate_player_average_scores function""" - df = pd.DataFrame({ - "player": ["Alice", "Bob"], - "score_1": [1, 3], - "score_2": [2, 4] - }) - expected_result = pd.DataFrame({ - "player": ["Alice", "Bob"], - "score_1": [1, 3], - "score_2": [2, 4], - "average_score": [1.5, 3.5] - }) - pd.testing.assert_frame_equal(calculate_player_average_scores(df), expected_result) -``` - -::::::::::::::::::::::::::::::::: - - :::::::::::::::::::::::::::::::::::::::::::::::: ::::::::::::::::::::::::::::::::::::: keypoints -- When comparing floating point data, you should use relative/absolute tolerances instead of testing for equality. -- Numpy arrays cannot be compared using the `==` operator. Instead, use `numpy.testing.assert_array_equal` and `numpy.testing.assert_allclose`. -- Pandas DataFrames and Series should be compared using `pandas.testing.assert_frame_equal` and `pandas.testing.assert_series_equal`. +- When comparing floating point data, you should use relative/absolute + tolerances instead of testing for equality. +- Numpy arrays cannot be compared using the `==` operator. Instead, use + `numpy.testing.assert_array_equal` and `numpy.testing.assert_allclose`. :::::::::::::::::::::::::::::::::::::::::::::::: diff --git a/learners/files/06-floating-point-data/data_structures.py b/learners/files/06-floating-point-data/data_structures.py deleted file mode 100644 index df39e65e..00000000 --- a/learners/files/06-floating-point-data/data_structures.py +++ /dev/null @@ -1,2 +0,0 @@ -import numpy as np -import pandas as pd diff --git a/learners/files/06-floating-point-data/test_data_structures.py b/learners/files/06-floating-point-data/test_data_structures.py deleted file mode 100644 index 57a770d9..00000000 --- a/learners/files/06-floating-point-data/test_data_structures.py +++ /dev/null @@ -1,123 +0,0 @@ -import numpy as np -import pandas as pd - - -def test_lists_equal(): - """Test that lists are equal""" - # Create two lists - list1 = [1, 2, 3] - list2 = [1, 2, 3] - # Check that the lists are equal - assert list1 == list2 - - # Two lists, different order - list3 = [1, 2, 3] - list4 = [3, 2, 1] - assert list3 != list4 - - # Create two different lists - list5 = [1, 2, 3] - list6 = [1, 2, 4] - # Check that the lists are not equal - assert list5 != list6 - - -def test_sorted_lists_equal(): - """Test that lists are equal""" - # Create two lists - list1 = [1, 2, 3] - list2 = [1, 2, 3] - # Check that the lists are equal - assert sorted(list1) == sorted(list2) - - # Two lists, different order - list3 = [1, 2, 3] - list4 = [3, 2, 1] - assert sorted(list3) == sorted(list4) - - # Create two different lists - list5 = [1, 2, 3] - list6 = [1, 2, 4] - # Check that the lists are not equal - assert sorted(list5) != sorted(list6) - - -def test_dictionaries_equal(): - """Test that dictionaries are equal""" - # Create two dictionaries - dict1 = {"a": 1, "b": 2, "c": 3} - dict2 = {"a": 1, "b": 2, "c": 3} - # Check that the dictionaries are equal - assert dict1 == dict2 - - # Create two dictionaries, different order - dict3 = {"a": 1, "b": 2, "c": 3} - dict4 = {"c": 3, "b": 2, "a": 1} - assert dict3 == dict4 - - # Create two different dictionaries - dict5 = {"a": 1, "b": 2, "c": 3} - dict6 = {"a": 1, "b": 2, "c": 4} - # Check that the dictionaries are not equal - assert dict5 != dict6 - - -def test_numpy_arrays(): - """Test that numpy arrays are equal""" - # Create two numpy arrays - array1 = np.array([1, 2, 3]) - array2 = np.array([1, 2, 3]) - # Check that the arrays are equal - np.testing.assert_array_equal(array1, array2) - - -def test_2d_numpy_arrays(): - """Test that 2d numpy arrays are equal""" - # Create two 2d numpy arrays - array1 = np.array([[1, 2], [3, 4]]) - array2 = np.array([[1, 2], [3, 4]]) - # Check that the nested arrays are equal - np.testing.assert_array_equal(array1, array2) - - -def test_numpy_arrays_with_tolerance(): - """Test that numpy arrays are equal with tolerance""" - # Create two numpy arrays - array1 = np.array([1.0, 2.0, 3.0]) - array2 = np.array([1.00009, 2.0005, 3.0001]) - # Check that the arrays are equal with tolerance - np.testing.assert_allclose(array1, array2, atol=1e-3) - - -def test_dictionaries_with_numpy_arrays(): - """Test that dictionaries with numpy arrays are equal""" - # Create two dictionaries with numpy arrays - dict1 = {"a": np.array([1, 2, 3]), "b": np.array([4, 5, 6])} - dict2 = {"a": np.array([1, 2, 3]), "b": np.array([4, 5, 6])} - # Check that the dictionaries are equal - np.testing.assert_equal(dict1, dict2) - - # Create two dictionaries with different numpy arrays - dict3 = {"a": np.array([1, 2, 3]), "b": np.array([4, 5, 6])} - dict4 = {"a": np.array([1, 2, 3]), "b": np.array([4, 5, 7])} - # Check that the dictionaries are not equal - with np.testing.assert_raises(AssertionError): - np.testing.assert_equal(dict3, dict4) - - -def test_pandas_dataframes(): - """Test that pandas DataFrames are equal""" - # Create two pandas DataFrames - df1 = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) - df2 = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) - # Check that the DataFrames are equal - pd.testing.assert_frame_equal(df1, df2) - - -def test_pandas_series(): - """Test that pandas Series are equal""" - # Create two pandas Series - s1 = pd.Series([1, 2, 3]) - s2 = pd.Series([1, 2, 3]) - # Check that the Series are equal - pd.testing.assert_series_equal(s1, s2) diff --git a/learners/files/06-floating-point-data/test_numpy.py b/learners/files/06-floating-point-data/test_numpy.py new file mode 100644 index 00000000..0eab737a --- /dev/null +++ b/learners/files/06-floating-point-data/test_numpy.py @@ -0,0 +1,27 @@ +import numpy as np + +def test_numpy_arrays(): + """Test that numpy arrays are equal""" + # Create two numpy arrays + array1 = np.array([1, 2, 3]) + array2 = np.array([1, 2, 3]) + # Check that the arrays are equal + np.testing.assert_array_equal(array1, array2) + + +def test_2d_numpy_arrays(): + """Test that 2d numpy arrays are equal""" + # Create two 2d numpy arrays + array1 = np.array([[1, 2], [3, 4]]) + array2 = np.array([[1, 2], [3, 4]]) + # Check that the nested arrays are equal + np.testing.assert_array_equal(array1, array2) + + +def test_numpy_arrays_with_tolerance(): + """Test that numpy arrays are equal with tolerance""" + # Create two numpy arrays + array1 = np.array([1.0, 2.0, 3.0]) + array2 = np.array([1.00009, 2.0005, 3.0001]) + # Check that the arrays are equal with tolerance + np.testing.assert_allclose(array1, array2, atol=1e-3) diff --git a/learners/files/07-fixtures/data_structures.py b/learners/files/07-fixtures/data_structures.py deleted file mode 100644 index df39e65e..00000000 --- a/learners/files/07-fixtures/data_structures.py +++ /dev/null @@ -1,2 +0,0 @@ -import numpy as np -import pandas as pd diff --git a/learners/files/07-fixtures/test_data_structures.py b/learners/files/07-fixtures/test_data_structures.py deleted file mode 100644 index 57a770d9..00000000 --- a/learners/files/07-fixtures/test_data_structures.py +++ /dev/null @@ -1,123 +0,0 @@ -import numpy as np -import pandas as pd - - -def test_lists_equal(): - """Test that lists are equal""" - # Create two lists - list1 = [1, 2, 3] - list2 = [1, 2, 3] - # Check that the lists are equal - assert list1 == list2 - - # Two lists, different order - list3 = [1, 2, 3] - list4 = [3, 2, 1] - assert list3 != list4 - - # Create two different lists - list5 = [1, 2, 3] - list6 = [1, 2, 4] - # Check that the lists are not equal - assert list5 != list6 - - -def test_sorted_lists_equal(): - """Test that lists are equal""" - # Create two lists - list1 = [1, 2, 3] - list2 = [1, 2, 3] - # Check that the lists are equal - assert sorted(list1) == sorted(list2) - - # Two lists, different order - list3 = [1, 2, 3] - list4 = [3, 2, 1] - assert sorted(list3) == sorted(list4) - - # Create two different lists - list5 = [1, 2, 3] - list6 = [1, 2, 4] - # Check that the lists are not equal - assert sorted(list5) != sorted(list6) - - -def test_dictionaries_equal(): - """Test that dictionaries are equal""" - # Create two dictionaries - dict1 = {"a": 1, "b": 2, "c": 3} - dict2 = {"a": 1, "b": 2, "c": 3} - # Check that the dictionaries are equal - assert dict1 == dict2 - - # Create two dictionaries, different order - dict3 = {"a": 1, "b": 2, "c": 3} - dict4 = {"c": 3, "b": 2, "a": 1} - assert dict3 == dict4 - - # Create two different dictionaries - dict5 = {"a": 1, "b": 2, "c": 3} - dict6 = {"a": 1, "b": 2, "c": 4} - # Check that the dictionaries are not equal - assert dict5 != dict6 - - -def test_numpy_arrays(): - """Test that numpy arrays are equal""" - # Create two numpy arrays - array1 = np.array([1, 2, 3]) - array2 = np.array([1, 2, 3]) - # Check that the arrays are equal - np.testing.assert_array_equal(array1, array2) - - -def test_2d_numpy_arrays(): - """Test that 2d numpy arrays are equal""" - # Create two 2d numpy arrays - array1 = np.array([[1, 2], [3, 4]]) - array2 = np.array([[1, 2], [3, 4]]) - # Check that the nested arrays are equal - np.testing.assert_array_equal(array1, array2) - - -def test_numpy_arrays_with_tolerance(): - """Test that numpy arrays are equal with tolerance""" - # Create two numpy arrays - array1 = np.array([1.0, 2.0, 3.0]) - array2 = np.array([1.00009, 2.0005, 3.0001]) - # Check that the arrays are equal with tolerance - np.testing.assert_allclose(array1, array2, atol=1e-3) - - -def test_dictionaries_with_numpy_arrays(): - """Test that dictionaries with numpy arrays are equal""" - # Create two dictionaries with numpy arrays - dict1 = {"a": np.array([1, 2, 3]), "b": np.array([4, 5, 6])} - dict2 = {"a": np.array([1, 2, 3]), "b": np.array([4, 5, 6])} - # Check that the dictionaries are equal - np.testing.assert_equal(dict1, dict2) - - # Create two dictionaries with different numpy arrays - dict3 = {"a": np.array([1, 2, 3]), "b": np.array([4, 5, 6])} - dict4 = {"a": np.array([1, 2, 3]), "b": np.array([4, 5, 7])} - # Check that the dictionaries are not equal - with np.testing.assert_raises(AssertionError): - np.testing.assert_equal(dict3, dict4) - - -def test_pandas_dataframes(): - """Test that pandas DataFrames are equal""" - # Create two pandas DataFrames - df1 = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) - df2 = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) - # Check that the DataFrames are equal - pd.testing.assert_frame_equal(df1, df2) - - -def test_pandas_series(): - """Test that pandas Series are equal""" - # Create two pandas Series - s1 = pd.Series([1, 2, 3]) - s2 = pd.Series([1, 2, 3]) - # Check that the Series are equal - pd.testing.assert_series_equal(s1, s2) diff --git a/learners/files/07-fixtures/test_numpy.py b/learners/files/07-fixtures/test_numpy.py new file mode 100644 index 00000000..0eab737a --- /dev/null +++ b/learners/files/07-fixtures/test_numpy.py @@ -0,0 +1,27 @@ +import numpy as np + +def test_numpy_arrays(): + """Test that numpy arrays are equal""" + # Create two numpy arrays + array1 = np.array([1, 2, 3]) + array2 = np.array([1, 2, 3]) + # Check that the arrays are equal + np.testing.assert_array_equal(array1, array2) + + +def test_2d_numpy_arrays(): + """Test that 2d numpy arrays are equal""" + # Create two 2d numpy arrays + array1 = np.array([[1, 2], [3, 4]]) + array2 = np.array([[1, 2], [3, 4]]) + # Check that the nested arrays are equal + np.testing.assert_array_equal(array1, array2) + + +def test_numpy_arrays_with_tolerance(): + """Test that numpy arrays are equal with tolerance""" + # Create two numpy arrays + array1 = np.array([1.0, 2.0, 3.0]) + array2 = np.array([1.00009, 2.0005, 3.0001]) + # Check that the arrays are equal with tolerance + np.testing.assert_allclose(array1, array2, atol=1e-3) diff --git a/learners/files/08-parametrization/data_structures.py b/learners/files/08-parametrization/data_structures.py deleted file mode 100644 index df39e65e..00000000 --- a/learners/files/08-parametrization/data_structures.py +++ /dev/null @@ -1,2 +0,0 @@ -import numpy as np -import pandas as pd diff --git a/learners/files/08-parametrization/test_data_structures.py b/learners/files/08-parametrization/test_data_structures.py deleted file mode 100644 index 57a770d9..00000000 --- a/learners/files/08-parametrization/test_data_structures.py +++ /dev/null @@ -1,123 +0,0 @@ -import numpy as np -import pandas as pd - - -def test_lists_equal(): - """Test that lists are equal""" - # Create two lists - list1 = [1, 2, 3] - list2 = [1, 2, 3] - # Check that the lists are equal - assert list1 == list2 - - # Two lists, different order - list3 = [1, 2, 3] - list4 = [3, 2, 1] - assert list3 != list4 - - # Create two different lists - list5 = [1, 2, 3] - list6 = [1, 2, 4] - # Check that the lists are not equal - assert list5 != list6 - - -def test_sorted_lists_equal(): - """Test that lists are equal""" - # Create two lists - list1 = [1, 2, 3] - list2 = [1, 2, 3] - # Check that the lists are equal - assert sorted(list1) == sorted(list2) - - # Two lists, different order - list3 = [1, 2, 3] - list4 = [3, 2, 1] - assert sorted(list3) == sorted(list4) - - # Create two different lists - list5 = [1, 2, 3] - list6 = [1, 2, 4] - # Check that the lists are not equal - assert sorted(list5) != sorted(list6) - - -def test_dictionaries_equal(): - """Test that dictionaries are equal""" - # Create two dictionaries - dict1 = {"a": 1, "b": 2, "c": 3} - dict2 = {"a": 1, "b": 2, "c": 3} - # Check that the dictionaries are equal - assert dict1 == dict2 - - # Create two dictionaries, different order - dict3 = {"a": 1, "b": 2, "c": 3} - dict4 = {"c": 3, "b": 2, "a": 1} - assert dict3 == dict4 - - # Create two different dictionaries - dict5 = {"a": 1, "b": 2, "c": 3} - dict6 = {"a": 1, "b": 2, "c": 4} - # Check that the dictionaries are not equal - assert dict5 != dict6 - - -def test_numpy_arrays(): - """Test that numpy arrays are equal""" - # Create two numpy arrays - array1 = np.array([1, 2, 3]) - array2 = np.array([1, 2, 3]) - # Check that the arrays are equal - np.testing.assert_array_equal(array1, array2) - - -def test_2d_numpy_arrays(): - """Test that 2d numpy arrays are equal""" - # Create two 2d numpy arrays - array1 = np.array([[1, 2], [3, 4]]) - array2 = np.array([[1, 2], [3, 4]]) - # Check that the nested arrays are equal - np.testing.assert_array_equal(array1, array2) - - -def test_numpy_arrays_with_tolerance(): - """Test that numpy arrays are equal with tolerance""" - # Create two numpy arrays - array1 = np.array([1.0, 2.0, 3.0]) - array2 = np.array([1.00009, 2.0005, 3.0001]) - # Check that the arrays are equal with tolerance - np.testing.assert_allclose(array1, array2, atol=1e-3) - - -def test_dictionaries_with_numpy_arrays(): - """Test that dictionaries with numpy arrays are equal""" - # Create two dictionaries with numpy arrays - dict1 = {"a": np.array([1, 2, 3]), "b": np.array([4, 5, 6])} - dict2 = {"a": np.array([1, 2, 3]), "b": np.array([4, 5, 6])} - # Check that the dictionaries are equal - np.testing.assert_equal(dict1, dict2) - - # Create two dictionaries with different numpy arrays - dict3 = {"a": np.array([1, 2, 3]), "b": np.array([4, 5, 6])} - dict4 = {"a": np.array([1, 2, 3]), "b": np.array([4, 5, 7])} - # Check that the dictionaries are not equal - with np.testing.assert_raises(AssertionError): - np.testing.assert_equal(dict3, dict4) - - -def test_pandas_dataframes(): - """Test that pandas DataFrames are equal""" - # Create two pandas DataFrames - df1 = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) - df2 = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) - # Check that the DataFrames are equal - pd.testing.assert_frame_equal(df1, df2) - - -def test_pandas_series(): - """Test that pandas Series are equal""" - # Create two pandas Series - s1 = pd.Series([1, 2, 3]) - s2 = pd.Series([1, 2, 3]) - # Check that the Series are equal - pd.testing.assert_series_equal(s1, s2) diff --git a/learners/files/08-parametrization/test_numpy.py b/learners/files/08-parametrization/test_numpy.py new file mode 100644 index 00000000..0eab737a --- /dev/null +++ b/learners/files/08-parametrization/test_numpy.py @@ -0,0 +1,27 @@ +import numpy as np + +def test_numpy_arrays(): + """Test that numpy arrays are equal""" + # Create two numpy arrays + array1 = np.array([1, 2, 3]) + array2 = np.array([1, 2, 3]) + # Check that the arrays are equal + np.testing.assert_array_equal(array1, array2) + + +def test_2d_numpy_arrays(): + """Test that 2d numpy arrays are equal""" + # Create two 2d numpy arrays + array1 = np.array([[1, 2], [3, 4]]) + array2 = np.array([[1, 2], [3, 4]]) + # Check that the nested arrays are equal + np.testing.assert_array_equal(array1, array2) + + +def test_numpy_arrays_with_tolerance(): + """Test that numpy arrays are equal with tolerance""" + # Create two numpy arrays + array1 = np.array([1.0, 2.0, 3.0]) + array2 = np.array([1.00009, 2.0005, 3.0001]) + # Check that the arrays are equal with tolerance + np.testing.assert_allclose(array1, array2, atol=1e-3) diff --git a/learners/files/09-testing-output-files/data_structures.py b/learners/files/09-testing-output-files/data_structures.py deleted file mode 100644 index df39e65e..00000000 --- a/learners/files/09-testing-output-files/data_structures.py +++ /dev/null @@ -1,2 +0,0 @@ -import numpy as np -import pandas as pd diff --git a/learners/files/09-testing-output-files/test_data_structures.py b/learners/files/09-testing-output-files/test_data_structures.py deleted file mode 100644 index 57a770d9..00000000 --- a/learners/files/09-testing-output-files/test_data_structures.py +++ /dev/null @@ -1,123 +0,0 @@ -import numpy as np -import pandas as pd - - -def test_lists_equal(): - """Test that lists are equal""" - # Create two lists - list1 = [1, 2, 3] - list2 = [1, 2, 3] - # Check that the lists are equal - assert list1 == list2 - - # Two lists, different order - list3 = [1, 2, 3] - list4 = [3, 2, 1] - assert list3 != list4 - - # Create two different lists - list5 = [1, 2, 3] - list6 = [1, 2, 4] - # Check that the lists are not equal - assert list5 != list6 - - -def test_sorted_lists_equal(): - """Test that lists are equal""" - # Create two lists - list1 = [1, 2, 3] - list2 = [1, 2, 3] - # Check that the lists are equal - assert sorted(list1) == sorted(list2) - - # Two lists, different order - list3 = [1, 2, 3] - list4 = [3, 2, 1] - assert sorted(list3) == sorted(list4) - - # Create two different lists - list5 = [1, 2, 3] - list6 = [1, 2, 4] - # Check that the lists are not equal - assert sorted(list5) != sorted(list6) - - -def test_dictionaries_equal(): - """Test that dictionaries are equal""" - # Create two dictionaries - dict1 = {"a": 1, "b": 2, "c": 3} - dict2 = {"a": 1, "b": 2, "c": 3} - # Check that the dictionaries are equal - assert dict1 == dict2 - - # Create two dictionaries, different order - dict3 = {"a": 1, "b": 2, "c": 3} - dict4 = {"c": 3, "b": 2, "a": 1} - assert dict3 == dict4 - - # Create two different dictionaries - dict5 = {"a": 1, "b": 2, "c": 3} - dict6 = {"a": 1, "b": 2, "c": 4} - # Check that the dictionaries are not equal - assert dict5 != dict6 - - -def test_numpy_arrays(): - """Test that numpy arrays are equal""" - # Create two numpy arrays - array1 = np.array([1, 2, 3]) - array2 = np.array([1, 2, 3]) - # Check that the arrays are equal - np.testing.assert_array_equal(array1, array2) - - -def test_2d_numpy_arrays(): - """Test that 2d numpy arrays are equal""" - # Create two 2d numpy arrays - array1 = np.array([[1, 2], [3, 4]]) - array2 = np.array([[1, 2], [3, 4]]) - # Check that the nested arrays are equal - np.testing.assert_array_equal(array1, array2) - - -def test_numpy_arrays_with_tolerance(): - """Test that numpy arrays are equal with tolerance""" - # Create two numpy arrays - array1 = np.array([1.0, 2.0, 3.0]) - array2 = np.array([1.00009, 2.0005, 3.0001]) - # Check that the arrays are equal with tolerance - np.testing.assert_allclose(array1, array2, atol=1e-3) - - -def test_dictionaries_with_numpy_arrays(): - """Test that dictionaries with numpy arrays are equal""" - # Create two dictionaries with numpy arrays - dict1 = {"a": np.array([1, 2, 3]), "b": np.array([4, 5, 6])} - dict2 = {"a": np.array([1, 2, 3]), "b": np.array([4, 5, 6])} - # Check that the dictionaries are equal - np.testing.assert_equal(dict1, dict2) - - # Create two dictionaries with different numpy arrays - dict3 = {"a": np.array([1, 2, 3]), "b": np.array([4, 5, 6])} - dict4 = {"a": np.array([1, 2, 3]), "b": np.array([4, 5, 7])} - # Check that the dictionaries are not equal - with np.testing.assert_raises(AssertionError): - np.testing.assert_equal(dict3, dict4) - - -def test_pandas_dataframes(): - """Test that pandas DataFrames are equal""" - # Create two pandas DataFrames - df1 = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) - df2 = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) - # Check that the DataFrames are equal - pd.testing.assert_frame_equal(df1, df2) - - -def test_pandas_series(): - """Test that pandas Series are equal""" - # Create two pandas Series - s1 = pd.Series([1, 2, 3]) - s2 = pd.Series([1, 2, 3]) - # Check that the Series are equal - pd.testing.assert_series_equal(s1, s2) diff --git a/learners/files/09-testing-output-files/test_numpy.py b/learners/files/09-testing-output-files/test_numpy.py new file mode 100644 index 00000000..0eab737a --- /dev/null +++ b/learners/files/09-testing-output-files/test_numpy.py @@ -0,0 +1,27 @@ +import numpy as np + +def test_numpy_arrays(): + """Test that numpy arrays are equal""" + # Create two numpy arrays + array1 = np.array([1, 2, 3]) + array2 = np.array([1, 2, 3]) + # Check that the arrays are equal + np.testing.assert_array_equal(array1, array2) + + +def test_2d_numpy_arrays(): + """Test that 2d numpy arrays are equal""" + # Create two 2d numpy arrays + array1 = np.array([[1, 2], [3, 4]]) + array2 = np.array([[1, 2], [3, 4]]) + # Check that the nested arrays are equal + np.testing.assert_array_equal(array1, array2) + + +def test_numpy_arrays_with_tolerance(): + """Test that numpy arrays are equal with tolerance""" + # Create two numpy arrays + array1 = np.array([1.0, 2.0, 3.0]) + array2 = np.array([1.00009, 2.0005, 3.0001]) + # Check that the arrays are equal with tolerance + np.testing.assert_allclose(array1, array2, atol=1e-3) diff --git a/learners/setup.md b/learners/setup.md index bf0b22e4..6a12447d 100644 --- a/learners/setup.md +++ b/learners/setup.md @@ -2,26 +2,44 @@ title: Setup --- -## Python testing for research +## Testing and Continuous Integration -This course aims to equip you with the tools and knowledge required to get started with software testing. It assumes no prior knowledge of testing, just basic familiarity with Python programming. Over the course of these lessons, you will learn what software testing entails, how to write tests, best practices, some more niche & powerful functionality and finally how to incorporate tests in a GitHub repository. +This course aims to equip you with the tools and knowledge required to get +started with software testing. It assumes no prior knowledge of testing, just +basic familiarity with Python programming. Over the course of these lessons, +you will learn what software testing entails, how to write tests, best +practices, some more niche & powerful functionality and finally how to +incorporate tests in a GitHub repository. ## Software Setup -Please complete these setup instructions before the course starts. This is to ensure that the course can start on time and all of the content can be covered. If you have any issues with the setup instructions, please reach out to a course instructor / coordinator. +Please complete these setup instructions before the course starts. This is to +ensure that the course can start on time and all of the content can be covered. +If you have any issues with the setup instructions, please reach out to a +course instructor / coordinator. For this course, you will need: +### A Text Editor +Preferably a code editor like Visual Studio Code but any text editor will do, +such as notepad. This is so that you can write and edit Python scripts. A code +editor will provide a better experience for writing code in this course. We +recommend Visual Studio Code as it is free and very popular with minimal setup +required. + ### A Terminal -Such as Terminal on MacOS / Linux or command prompt on Windows. This is so that you can run Python scripts and commit code to GitHub. +Such as Terminal on MacOS / Linux or command prompt on Windows. This is so that +you can run Python scripts and commit code to GitHub. Note that Visual Studio +Code provides both a terminal and Git integration. -### A Text Editor -Preferably a code editor like Visual Studio Code but any text editor will do, such as notepad. This is so that you can write and edit Python scripts. A code editor will provide a better experience for writing code in this course. We recommend Visual Studio Code as it is free and very popular with minimal setup required. ### Python -Preferably Python 3.10 or 3.11. You can download Python from [Python's official website](https://www.python.org/downloads/) +Preferably Python 3.12 or higher. You can download Python from [Python's +official website](https://www.python.org/downloads/). -It is recommended that you use a virtual environment for this course. This can be a standard Python virtual environment or a conda environment. You can create a virtual environment using the following commands: +It is recommended that you use a virtual environment for this course. This can +be a standard Python virtual environment or a conda environment. You can create +a virtual environment using the following commands: ```bash # For a standard Python virtual environment @@ -33,14 +51,17 @@ conda create --name myenv conda activate myenv ``` -There are some python packages that will be needed in this course, you can install them using the following command: +There are some python packages that will be needed in this course, you can +install them using the following command: ```bash -pip install numpy pandas matplotlib pytest pytest-regtest pytest-mpl +pip install numpy pytest snaptol ``` ### Git -This course touches on some features of GitHub and requires Git to be installed. You can download Git from the [official Git website](https://git-scm.com/downloads). If this is your first time using Git, you may want to check out the [Git Handbook](https://guides.github.com/introduction/git-handbook/). +This course touches on some features of GitHub and requires Git to be installed. You +may find it helpful to view the material from our course [Introduction to Git +and GitHub](https://researchcodingclub.github.io/course/#version-control-introduction-to-git-and-github). ### A GitHub account A GitHub accound is required for the Continuous Integration section of this course. From 9d5ad2e980f4249fc8dd7e6cf4f36887fbd23e66 Mon Sep 17 00:00:00 2001 From: Liam Pattinson Date: Sat, 14 Feb 2026 11:47:18 +0000 Subject: [PATCH 3/3] Simplify example files --- .../statistics/stats.py | 104 ------------------ .../statistics/test_stats.py | 55 --------- .../files/07-fixtures/statistics/stats.py | 104 ------------------ .../07-fixtures/statistics/test_stats.py | 54 --------- .../08-parametrization/statistics/stats.py | 104 ------------------ .../statistics/test_stats.py | 54 --------- .../statistics/stats.py | 104 ------------------ .../statistics/test_stats.py | 55 --------- 8 files changed, 634 deletions(-) diff --git a/learners/files/06-floating-point-data/statistics/stats.py b/learners/files/06-floating-point-data/statistics/stats.py index 93eea5d3..581a3791 100644 --- a/learners/files/06-floating-point-data/statistics/stats.py +++ b/learners/files/06-floating-point-data/statistics/stats.py @@ -1,6 +1,3 @@ -import numpy as np -import pandas as pd - import random @@ -35,104 +32,3 @@ def randomly_sample_and_filter_participants( age_filtered_participants = filter_participants_by_age(sampled_participants, min_age, max_age) height_filtered_participants = filter_participants_by_height(age_filtered_participants, min_height, max_height) return height_filtered_participants - - -def remove_anomalies(data: list, maximum_value: float, minimum_value: float) -> list: - """Remove anomalies from a list of numbers""" - - result = [] - - for value in data: - if minimum_value <= value <= maximum_value: - result.append(value) - - return result - - -def calculate_frequency(data: list) -> dict: - """Calculate the frequency of each element in a list""" - - frequencies = {} - - # Iterate over each value in the list - for value in data: - # If the value is already in the dictionary, increment the count - if value in frequencies: - frequencies[value] += 1 - # Otherwise, add the value to the dictionary with a count of 1 - else: - frequencies[value] = 1 - - return frequencies - - -def calculate_cumulative_sum(array: np.ndarray) -> np.ndarray: - """Calculate the cumulative sum of a numpy array""" - - # don't use the built-in numpy function - result = np.zeros(array.shape) - result[0] = array[0] - for i in range(1, len(array)): - result[i] = result[i - 1] + array[i] - - return result - - -def calculate_player_total_scores(participants: dict): - """Calculate the total score of each player in a dictionary. - - Example input: - { - "Alice": { - "scores": np.array([1, 2, 3]) - }, - "Bob": { - "scores": np.array([4, 5, 6]) - }, - "Charlie": { - "scores": np.array([7, 8, 9]) - }, - } - - Example output: - { - "Alice": { - "scores": np.array([1, 2, 3]), - "total_score": 6 - }, - "Bob": { - "scores": np.array([4, 5, 6]), - "total_score": 15 - }, - "Charlie": { - "scores": np.array([7, 8, 9]), - "total_score": 24 - }, - } - """ - - for player in participants: - participants[player]["total_score"] = np.sum(participants[player]["scores"]) - - return participants - - -def calculate_player_average_scores(df: pd.DataFrame) -> pd.DataFrame: - """Calculate the average score of each player in a pandas DataFrame. - - Example input: - | | player | score_1 | score_2 | - |---|---------|---------|---------| - | 0 | Alice | 1 | 2 | - | 1 | Bob | 3 | 4 | - - Example output: - | | player | score_1 | score_2 | average_score | - |---|---------|---------|---------|---------------| - | 0 | Alice | 1 | 2 | 1.5 | - | 1 | Bob | 3 | 4 | 3.5 | - """ - - df["average_score"] = df[["score_1", "score_2"]].mean(axis=1) - - return df diff --git a/learners/files/06-floating-point-data/statistics/test_stats.py b/learners/files/06-floating-point-data/statistics/test_stats.py index 573902a9..fd761486 100644 --- a/learners/files/06-floating-point-data/statistics/test_stats.py +++ b/learners/files/06-floating-point-data/statistics/test_stats.py @@ -1,16 +1,8 @@ -import numpy as np -import pandas as pd - from stats import ( sample_participants, filter_participants_by_age, filter_participants_by_height, randomly_sample_and_filter_participants, - remove_anomalies, - calculate_frequency, - calculate_cumulative_sum, - calculate_player_total_scores, - calculate_player_average_scores, ) import random @@ -88,50 +80,3 @@ def test_randomly_sample_and_filter_participants(): ) expected = [{"age": 38, "height": 165}, {"age": 30, "height": 170}, {"age": 35, "height": 160}] assert filtered_participants == expected - - -def test_remove_anomalies(): - """Test remove_anomalies function""" - data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - maximum_value = 5 - minimum_value = 2 - expected_result = [2, 3, 4, 5] - assert remove_anomalies(data, maximum_value, minimum_value) == expected_result - - -def test_calculate_frequency(): - """Test calculate_frequency function""" - data = [1, 2, 3, 1, 2, 1, 1, 3, 3, 3] - expected_result = {1: 4, 2: 2, 3: 4} - assert calculate_frequency(data) == expected_result - - -def test_calculate_cumulative_sum(): - """Test calculate_cumulative_sum function""" - array = np.array([1, 2, 3, 4, 5]) - expected_result = np.array([1, 3, 6, 10, 15]) - np.testing.assert_array_equal(calculate_cumulative_sum(array), expected_result) - - -def test_calculate_player_total_scores(): - """Test calculate_player_total_scores function""" - participants = { - "Alice": {"scores": np.array([1, 2, 3])}, - "Bob": {"scores": np.array([4, 5, 6])}, - "Charlie": {"scores": np.array([7, 8, 9])}, - } - expected_result = { - "Alice": {"scores": np.array([1, 2, 3]), "total_score": 6}, - "Bob": {"scores": np.array([4, 5, 6]), "total_score": 15}, - "Charlie": {"scores": np.array([7, 8, 9]), "total_score": 24}, - } - np.testing.assert_equal(calculate_player_total_scores(participants), expected_result) - - -def test_calculate_player_average_scores(): - """Test calculate_player_average_scores function""" - df = pd.DataFrame({"player": ["Alice", "Bob"], "score_1": [1, 3], "score_2": [2, 4]}) - expected_result = pd.DataFrame( - {"player": ["Alice", "Bob"], "score_1": [1, 3], "score_2": [2, 4], "average_score": [1.5, 3.5]} - ) - pd.testing.assert_frame_equal(calculate_player_average_scores(df), expected_result) diff --git a/learners/files/07-fixtures/statistics/stats.py b/learners/files/07-fixtures/statistics/stats.py index 93eea5d3..581a3791 100644 --- a/learners/files/07-fixtures/statistics/stats.py +++ b/learners/files/07-fixtures/statistics/stats.py @@ -1,6 +1,3 @@ -import numpy as np -import pandas as pd - import random @@ -35,104 +32,3 @@ def randomly_sample_and_filter_participants( age_filtered_participants = filter_participants_by_age(sampled_participants, min_age, max_age) height_filtered_participants = filter_participants_by_height(age_filtered_participants, min_height, max_height) return height_filtered_participants - - -def remove_anomalies(data: list, maximum_value: float, minimum_value: float) -> list: - """Remove anomalies from a list of numbers""" - - result = [] - - for value in data: - if minimum_value <= value <= maximum_value: - result.append(value) - - return result - - -def calculate_frequency(data: list) -> dict: - """Calculate the frequency of each element in a list""" - - frequencies = {} - - # Iterate over each value in the list - for value in data: - # If the value is already in the dictionary, increment the count - if value in frequencies: - frequencies[value] += 1 - # Otherwise, add the value to the dictionary with a count of 1 - else: - frequencies[value] = 1 - - return frequencies - - -def calculate_cumulative_sum(array: np.ndarray) -> np.ndarray: - """Calculate the cumulative sum of a numpy array""" - - # don't use the built-in numpy function - result = np.zeros(array.shape) - result[0] = array[0] - for i in range(1, len(array)): - result[i] = result[i - 1] + array[i] - - return result - - -def calculate_player_total_scores(participants: dict): - """Calculate the total score of each player in a dictionary. - - Example input: - { - "Alice": { - "scores": np.array([1, 2, 3]) - }, - "Bob": { - "scores": np.array([4, 5, 6]) - }, - "Charlie": { - "scores": np.array([7, 8, 9]) - }, - } - - Example output: - { - "Alice": { - "scores": np.array([1, 2, 3]), - "total_score": 6 - }, - "Bob": { - "scores": np.array([4, 5, 6]), - "total_score": 15 - }, - "Charlie": { - "scores": np.array([7, 8, 9]), - "total_score": 24 - }, - } - """ - - for player in participants: - participants[player]["total_score"] = np.sum(participants[player]["scores"]) - - return participants - - -def calculate_player_average_scores(df: pd.DataFrame) -> pd.DataFrame: - """Calculate the average score of each player in a pandas DataFrame. - - Example input: - | | player | score_1 | score_2 | - |---|---------|---------|---------| - | 0 | Alice | 1 | 2 | - | 1 | Bob | 3 | 4 | - - Example output: - | | player | score_1 | score_2 | average_score | - |---|---------|---------|---------|---------------| - | 0 | Alice | 1 | 2 | 1.5 | - | 1 | Bob | 3 | 4 | 3.5 | - """ - - df["average_score"] = df[["score_1", "score_2"]].mean(axis=1) - - return df diff --git a/learners/files/07-fixtures/statistics/test_stats.py b/learners/files/07-fixtures/statistics/test_stats.py index fda2fc89..806c3539 100644 --- a/learners/files/07-fixtures/statistics/test_stats.py +++ b/learners/files/07-fixtures/statistics/test_stats.py @@ -1,5 +1,3 @@ -import numpy as np -import pandas as pd import pytest from stats import ( @@ -7,11 +5,6 @@ filter_participants_by_age, filter_participants_by_height, randomly_sample_and_filter_participants, - remove_anomalies, - calculate_frequency, - calculate_cumulative_sum, - calculate_player_total_scores, - calculate_player_average_scores, ) import random @@ -69,50 +62,3 @@ def test_randomly_sample_and_filter_participants(participants): ) expected = [{"age": 38, "height": 165}, {"age": 30, "height": 170}, {"age": 35, "height": 160}] assert filtered_participants == expected - - -def test_remove_anomalies(): - """Test remove_anomalies function""" - data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - maximum_value = 5 - minimum_value = 2 - expected_result = [2, 3, 4, 5] - assert remove_anomalies(data, maximum_value, minimum_value) == expected_result - - -def test_calculate_frequency(): - """Test calculate_frequency function""" - data = [1, 2, 3, 1, 2, 1, 1, 3, 3, 3] - expected_result = {1: 4, 2: 2, 3: 4} - assert calculate_frequency(data) == expected_result - - -def test_calculate_cumulative_sum(): - """Test calculate_cumulative_sum function""" - array = np.array([1, 2, 3, 4, 5]) - expected_result = np.array([1, 3, 6, 10, 15]) - np.testing.assert_array_equal(calculate_cumulative_sum(array), expected_result) - - -def test_calculate_player_total_scores(): - """Test calculate_player_total_scores function""" - participants = { - "Alice": {"scores": np.array([1, 2, 3])}, - "Bob": {"scores": np.array([4, 5, 6])}, - "Charlie": {"scores": np.array([7, 8, 9])}, - } - expected_result = { - "Alice": {"scores": np.array([1, 2, 3]), "total_score": 6}, - "Bob": {"scores": np.array([4, 5, 6]), "total_score": 15}, - "Charlie": {"scores": np.array([7, 8, 9]), "total_score": 24}, - } - np.testing.assert_equal(calculate_player_total_scores(participants), expected_result) - - -def test_calculate_player_average_scores(): - """Test calculate_player_average_scores function""" - df = pd.DataFrame({"player": ["Alice", "Bob"], "score_1": [1, 3], "score_2": [2, 4]}) - expected_result = pd.DataFrame( - {"player": ["Alice", "Bob"], "score_1": [1, 3], "score_2": [2, 4], "average_score": [1.5, 3.5]} - ) - pd.testing.assert_frame_equal(calculate_player_average_scores(df), expected_result) diff --git a/learners/files/08-parametrization/statistics/stats.py b/learners/files/08-parametrization/statistics/stats.py index 93eea5d3..581a3791 100644 --- a/learners/files/08-parametrization/statistics/stats.py +++ b/learners/files/08-parametrization/statistics/stats.py @@ -1,6 +1,3 @@ -import numpy as np -import pandas as pd - import random @@ -35,104 +32,3 @@ def randomly_sample_and_filter_participants( age_filtered_participants = filter_participants_by_age(sampled_participants, min_age, max_age) height_filtered_participants = filter_participants_by_height(age_filtered_participants, min_height, max_height) return height_filtered_participants - - -def remove_anomalies(data: list, maximum_value: float, minimum_value: float) -> list: - """Remove anomalies from a list of numbers""" - - result = [] - - for value in data: - if minimum_value <= value <= maximum_value: - result.append(value) - - return result - - -def calculate_frequency(data: list) -> dict: - """Calculate the frequency of each element in a list""" - - frequencies = {} - - # Iterate over each value in the list - for value in data: - # If the value is already in the dictionary, increment the count - if value in frequencies: - frequencies[value] += 1 - # Otherwise, add the value to the dictionary with a count of 1 - else: - frequencies[value] = 1 - - return frequencies - - -def calculate_cumulative_sum(array: np.ndarray) -> np.ndarray: - """Calculate the cumulative sum of a numpy array""" - - # don't use the built-in numpy function - result = np.zeros(array.shape) - result[0] = array[0] - for i in range(1, len(array)): - result[i] = result[i - 1] + array[i] - - return result - - -def calculate_player_total_scores(participants: dict): - """Calculate the total score of each player in a dictionary. - - Example input: - { - "Alice": { - "scores": np.array([1, 2, 3]) - }, - "Bob": { - "scores": np.array([4, 5, 6]) - }, - "Charlie": { - "scores": np.array([7, 8, 9]) - }, - } - - Example output: - { - "Alice": { - "scores": np.array([1, 2, 3]), - "total_score": 6 - }, - "Bob": { - "scores": np.array([4, 5, 6]), - "total_score": 15 - }, - "Charlie": { - "scores": np.array([7, 8, 9]), - "total_score": 24 - }, - } - """ - - for player in participants: - participants[player]["total_score"] = np.sum(participants[player]["scores"]) - - return participants - - -def calculate_player_average_scores(df: pd.DataFrame) -> pd.DataFrame: - """Calculate the average score of each player in a pandas DataFrame. - - Example input: - | | player | score_1 | score_2 | - |---|---------|---------|---------| - | 0 | Alice | 1 | 2 | - | 1 | Bob | 3 | 4 | - - Example output: - | | player | score_1 | score_2 | average_score | - |---|---------|---------|---------|---------------| - | 0 | Alice | 1 | 2 | 1.5 | - | 1 | Bob | 3 | 4 | 3.5 | - """ - - df["average_score"] = df[["score_1", "score_2"]].mean(axis=1) - - return df diff --git a/learners/files/08-parametrization/statistics/test_stats.py b/learners/files/08-parametrization/statistics/test_stats.py index fda2fc89..806c3539 100644 --- a/learners/files/08-parametrization/statistics/test_stats.py +++ b/learners/files/08-parametrization/statistics/test_stats.py @@ -1,5 +1,3 @@ -import numpy as np -import pandas as pd import pytest from stats import ( @@ -7,11 +5,6 @@ filter_participants_by_age, filter_participants_by_height, randomly_sample_and_filter_participants, - remove_anomalies, - calculate_frequency, - calculate_cumulative_sum, - calculate_player_total_scores, - calculate_player_average_scores, ) import random @@ -69,50 +62,3 @@ def test_randomly_sample_and_filter_participants(participants): ) expected = [{"age": 38, "height": 165}, {"age": 30, "height": 170}, {"age": 35, "height": 160}] assert filtered_participants == expected - - -def test_remove_anomalies(): - """Test remove_anomalies function""" - data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - maximum_value = 5 - minimum_value = 2 - expected_result = [2, 3, 4, 5] - assert remove_anomalies(data, maximum_value, minimum_value) == expected_result - - -def test_calculate_frequency(): - """Test calculate_frequency function""" - data = [1, 2, 3, 1, 2, 1, 1, 3, 3, 3] - expected_result = {1: 4, 2: 2, 3: 4} - assert calculate_frequency(data) == expected_result - - -def test_calculate_cumulative_sum(): - """Test calculate_cumulative_sum function""" - array = np.array([1, 2, 3, 4, 5]) - expected_result = np.array([1, 3, 6, 10, 15]) - np.testing.assert_array_equal(calculate_cumulative_sum(array), expected_result) - - -def test_calculate_player_total_scores(): - """Test calculate_player_total_scores function""" - participants = { - "Alice": {"scores": np.array([1, 2, 3])}, - "Bob": {"scores": np.array([4, 5, 6])}, - "Charlie": {"scores": np.array([7, 8, 9])}, - } - expected_result = { - "Alice": {"scores": np.array([1, 2, 3]), "total_score": 6}, - "Bob": {"scores": np.array([4, 5, 6]), "total_score": 15}, - "Charlie": {"scores": np.array([7, 8, 9]), "total_score": 24}, - } - np.testing.assert_equal(calculate_player_total_scores(participants), expected_result) - - -def test_calculate_player_average_scores(): - """Test calculate_player_average_scores function""" - df = pd.DataFrame({"player": ["Alice", "Bob"], "score_1": [1, 3], "score_2": [2, 4]}) - expected_result = pd.DataFrame( - {"player": ["Alice", "Bob"], "score_1": [1, 3], "score_2": [2, 4], "average_score": [1.5, 3.5]} - ) - pd.testing.assert_frame_equal(calculate_player_average_scores(df), expected_result) diff --git a/learners/files/09-testing-output-files/statistics/stats.py b/learners/files/09-testing-output-files/statistics/stats.py index d6d8ffc7..8cf18ecb 100644 --- a/learners/files/09-testing-output-files/statistics/stats.py +++ b/learners/files/09-testing-output-files/statistics/stats.py @@ -1,6 +1,3 @@ -import numpy as np -import pandas as pd - import random @@ -37,107 +34,6 @@ def randomly_sample_and_filter_participants( return height_filtered_participants -def remove_anomalies(data: list, maximum_value: float, minimum_value: float) -> list: - """Remove anomalies from a list of numbers""" - - result = [] - - for value in data: - if minimum_value <= value <= maximum_value: - result.append(value) - - return result - - -def calculate_frequency(data: list) -> dict: - """Calculate the frequency of each element in a list""" - - frequencies = {} - - # Iterate over each value in the list - for value in data: - # If the value is already in the dictionary, increment the count - if value in frequencies: - frequencies[value] += 1 - # Otherwise, add the value to the dictionary with a count of 1 - else: - frequencies[value] = 1 - - return frequencies - - -def calculate_cumulative_sum(array: np.ndarray) -> np.ndarray: - """Calculate the cumulative sum of a numpy array""" - - # don't use the built-in numpy function - result = np.zeros(array.shape) - result[0] = array[0] - for i in range(1, len(array)): - result[i] = result[i - 1] + array[i] - - return result - - -def calculate_player_total_scores(participants: dict): - """Calculate the total score of each player in a dictionary. - - Example input: - { - "Alice": { - "scores": np.array([1, 2, 3]) - }, - "Bob": { - "scores": np.array([4, 5, 6]) - }, - "Charlie": { - "scores": np.array([7, 8, 9]) - }, - } - - Example output: - { - "Alice": { - "scores": np.array([1, 2, 3]), - "total_score": 6 - }, - "Bob": { - "scores": np.array([4, 5, 6]), - "total_score": 15 - }, - "Charlie": { - "scores": np.array([7, 8, 9]), - "total_score": 24 - }, - } - """ - - for player in participants: - participants[player]["total_score"] = np.sum(participants[player]["scores"]) - - return participants - - -def calculate_player_average_scores(df: pd.DataFrame) -> pd.DataFrame: - """Calculate the average score of each player in a pandas DataFrame. - - Example input: - | | player | score_1 | score_2 | - |---|---------|---------|---------| - | 0 | Alice | 1 | 2 | - | 1 | Bob | 3 | 4 | - - Example output: - | | player | score_1 | score_2 | average_score | - |---|---------|---------|---------|---------------| - | 0 | Alice | 1 | 2 | 1.5 | - | 1 | Bob | 3 | 4 | 3.5 | - """ - - df["average_score"] = df[["score_1", "score_2"]].mean(axis=1) - - return df - - def very_complex_processing(data: list): # Do some very complex processing diff --git a/learners/files/09-testing-output-files/statistics/test_stats.py b/learners/files/09-testing-output-files/statistics/test_stats.py index 56e8ba05..5c7ab195 100644 --- a/learners/files/09-testing-output-files/statistics/test_stats.py +++ b/learners/files/09-testing-output-files/statistics/test_stats.py @@ -1,5 +1,3 @@ -import numpy as np -import pandas as pd import pytest from stats import ( @@ -7,11 +5,6 @@ filter_participants_by_age, filter_participants_by_height, randomly_sample_and_filter_participants, - remove_anomalies, - calculate_frequency, - calculate_cumulative_sum, - calculate_player_total_scores, - calculate_player_average_scores, very_complex_processing, ) @@ -71,54 +64,6 @@ def test_randomly_sample_and_filter_participants(participants): expected = [{"age": 38, "height": 165}, {"age": 30, "height": 170}, {"age": 35, "height": 160}] assert filtered_participants == expected - -def test_remove_anomalies(): - """Test remove_anomalies function""" - data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - maximum_value = 5 - minimum_value = 2 - expected_result = [2, 3, 4, 5] - assert remove_anomalies(data, maximum_value, minimum_value) == expected_result - - -def test_calculate_frequency(): - """Test calculate_frequency function""" - data = [1, 2, 3, 1, 2, 1, 1, 3, 3, 3] - expected_result = {1: 4, 2: 2, 3: 4} - assert calculate_frequency(data) == expected_result - - -def test_calculate_cumulative_sum(): - """Test calculate_cumulative_sum function""" - array = np.array([1, 2, 3, 4, 5]) - expected_result = np.array([1, 3, 6, 10, 15]) - np.testing.assert_array_equal(calculate_cumulative_sum(array), expected_result) - - -def test_calculate_player_total_scores(): - """Test calculate_player_total_scores function""" - participants = { - "Alice": {"scores": np.array([1, 2, 3])}, - "Bob": {"scores": np.array([4, 5, 6])}, - "Charlie": {"scores": np.array([7, 8, 9])}, - } - expected_result = { - "Alice": {"scores": np.array([1, 2, 3]), "total_score": 6}, - "Bob": {"scores": np.array([4, 5, 6]), "total_score": 15}, - "Charlie": {"scores": np.array([7, 8, 9]), "total_score": 24}, - } - np.testing.assert_equal(calculate_player_total_scores(participants), expected_result) - - -def test_calculate_player_average_scores(): - """Test calculate_player_average_scores function""" - df = pd.DataFrame({"player": ["Alice", "Bob"], "score_1": [1, 3], "score_2": [2, 4]}) - expected_result = pd.DataFrame( - {"player": ["Alice", "Bob"], "score_1": [1, 3], "score_2": [2, 4], "average_score": [1.5, 3.5]} - ) - pd.testing.assert_frame_equal(calculate_player_average_scores(df), expected_result) - - def test_very_complex_processing(regtest): data = [1, 2, 3]