84 lines
2.6 KiB
Python
84 lines
2.6 KiB
Python
|
import io
|
||
|
import json
|
||
|
import os
|
||
|
import tarfile
|
||
|
|
||
|
import pandas as pd
|
||
|
import pytest
|
||
|
|
||
|
from src.cleaner.clean import clean_all, save_set
|
||
|
|
||
|
|
||
|
@pytest.fixture
|
||
|
def sample_tar_file(tmp_path):
|
||
|
# Create a sample tar file for testing
|
||
|
tar_file_path = os.path.join(tmp_path, "sample_issues.tar.gz")
|
||
|
with tarfile.open(tar_file_path, 'w:gz') as tar:
|
||
|
# Add a sample JSON file to the tar archive
|
||
|
sample_issue = {
|
||
|
"id": 10001,
|
||
|
"node_id": "giovanni",
|
||
|
"number": 1,
|
||
|
"title": "Sample Issue",
|
||
|
"user": {
|
||
|
"login": "test_user",
|
||
|
"id": 2,
|
||
|
},
|
||
|
"labels": [],
|
||
|
"state": "open",
|
||
|
"assignee": {
|
||
|
"login": "sample_user",
|
||
|
"id": 3,
|
||
|
},
|
||
|
"assignees": [
|
||
|
{
|
||
|
"login": "sample_user",
|
||
|
"id": 3,
|
||
|
}
|
||
|
],
|
||
|
"created_at": "2022-01-01T00:00:00Z",
|
||
|
"body": "This is a sample issue body.",
|
||
|
}
|
||
|
tarinfo = tarfile.TarInfo('sample_issue.json')
|
||
|
contents: bytes = json.dumps(sample_issue).encode()
|
||
|
tarinfo.size = len(contents)
|
||
|
|
||
|
file_object = io.BytesIO(contents)
|
||
|
tar.addfile(tarinfo, fileobj=file_object)
|
||
|
|
||
|
return tar_file_path
|
||
|
|
||
|
|
||
|
def test_clean_all(sample_tar_file):
|
||
|
objs = []
|
||
|
counter = clean_all(objs, sample_tar_file)
|
||
|
assert counter == 0 # No issues should be skipped
|
||
|
|
||
|
# Assuming you have some assertions for the content of objs based on the sample data
|
||
|
assert len(objs) == 1
|
||
|
assert objs[0]['id'] == 1
|
||
|
assert objs[0]['title'] == 'Sample Issue'
|
||
|
assert objs[0]['body'] == 'This is a sample issue body.'
|
||
|
assert objs[0]['state'] == 'open'
|
||
|
assert objs[0]['assignee'] == 'sample_user'
|
||
|
assert objs[0]['created_at'] == '2022-01-01T00:00:00Z'
|
||
|
|
||
|
|
||
|
def test_save_set(tmp_path):
|
||
|
# Assuming you have a DataFrame (df) with some sample data
|
||
|
df = pd.DataFrame({
|
||
|
'title': ['Issue 1', 'Issue 2', 'Issue 3'],
|
||
|
'body': ['Body 1', 'Body 2', 'Body 3'],
|
||
|
'state': ['open', 'closed', 'open'],
|
||
|
'assignee': ['user1', 'user2', 'user3'],
|
||
|
'created_at': ['2022-01-01T00:00:00Z', '2022-01-02T00:00:00Z', '2022-01-03T00:00:00Z']
|
||
|
}, index=[1, 2, 3])
|
||
|
|
||
|
# Save the DataFrame to a CSV file using save_set
|
||
|
save_set(df, 1, 3, 'test', os.path.join(tmp_path, 'test_file_'))
|
||
|
|
||
|
# Load the saved CSV file and assert its content
|
||
|
loaded_df = pd.read_csv(os.path.join(tmp_path, 'test_file_test_000001_000003.csv'), index_col=0)
|
||
|
|
||
|
assert loaded_df.equals(df)
|