This repository has been archived on 2024-10-22. You can view files and clone it, but cannot push or open issues or pull requests.
soft-analytics-01/tests/test_cleaner_clean.py

84 lines
2.6 KiB
Python
Raw Permalink Normal View History

import io
import json
import os
import tarfile
import pandas as pd
import pytest
from src.cleaner.clean import clean_all, save_set
@pytest.fixture
def sample_tar_file(tmp_path):
# Create a sample tar file for testing
tar_file_path = os.path.join(tmp_path, "sample_issues.tar.gz")
with tarfile.open(tar_file_path, 'w:gz') as tar:
# Add a sample JSON file to the tar archive
sample_issue = {
"id": 10001,
"node_id": "giovanni",
"number": 1,
"title": "Sample Issue",
"user": {
"login": "test_user",
"id": 2,
},
"labels": [],
"state": "open",
"assignee": {
"login": "sample_user",
"id": 3,
},
"assignees": [
{
"login": "sample_user",
"id": 3,
}
],
"created_at": "2022-01-01T00:00:00Z",
"body": "This is a sample issue body.",
}
tarinfo = tarfile.TarInfo('sample_issue.json')
contents: bytes = json.dumps(sample_issue).encode()
tarinfo.size = len(contents)
file_object = io.BytesIO(contents)
tar.addfile(tarinfo, fileobj=file_object)
return tar_file_path
def test_clean_all(sample_tar_file):
objs = []
counter = clean_all(objs, sample_tar_file)
assert counter == 0 # No issues should be skipped
# Assuming you have some assertions for the content of objs based on the sample data
assert len(objs) == 1
assert objs[0]['id'] == 1
assert objs[0]['title'] == 'Sample Issue'
assert objs[0]['body'] == 'This is a sample issue body.'
assert objs[0]['state'] == 'open'
assert objs[0]['assignee'] == 'sample_user'
assert objs[0]['created_at'] == '2022-01-01T00:00:00Z'
def test_save_set(tmp_path):
# Assuming you have a DataFrame (df) with some sample data
df = pd.DataFrame({
'title': ['Issue 1', 'Issue 2', 'Issue 3'],
'body': ['Body 1', 'Body 2', 'Body 3'],
'state': ['open', 'closed', 'open'],
'assignee': ['user1', 'user2', 'user3'],
'created_at': ['2022-01-01T00:00:00Z', '2022-01-02T00:00:00Z', '2022-01-03T00:00:00Z']
}, index=[1, 2, 3])
# Save the DataFrame to a CSV file using save_set
save_set(df, 1, 3, 'test', os.path.join(tmp_path, 'test_file_'))
# Load the saved CSV file and assert its content
loaded_df = pd.read_csv(os.path.join(tmp_path, 'test_file_test_000001_000003.csv'), index_col=0)
assert loaded_df.equals(df)