diff --git a/data_preparation.ipynb b/data_preparation.ipynb
index d583338..0e86f3b 100644
--- a/data_preparation.ipynb
+++ b/data_preparation.ipynb
@@ -9,8 +9,129 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
+ "execution_count": 11,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-03-16T14:25:19.428406Z",
+ "iopub.status.busy": "2026-03-16T14:25:19.427327Z",
+ "iopub.status.idle": "2026-03-16T14:25:21.207443Z",
+ "shell.execute_reply": "2026-03-16T14:25:21.206008Z",
+ "shell.execute_reply.started": "2026-03-16T14:25:19.428366Z"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " seller_id | \n",
+ " seller_zip_code_prefix | \n",
+ " seller_city | \n",
+ " seller_state | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 3442f8959a84dea7ee197c632cb2df15 | \n",
+ " 13023 | \n",
+ " campinas | \n",
+ " SP | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " d1b65fc7debc3361ea86b5f14c68d2e2 | \n",
+ " 13844 | \n",
+ " mogi guacu | \n",
+ " SP | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " ce3ad9de960102d0677a81f5d0bb7b2d | \n",
+ " 20031 | \n",
+ " rio de janeiro | \n",
+ " RJ | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " c0f3eea2e14555b6faeea3dd58c1b1c3 | \n",
+ " 4195 | \n",
+ " sao paulo | \n",
+ " SP | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 51a04a8a6bdcb23deccc82b0b80742cf | \n",
+ " 12914 | \n",
+ " braganca paulista | \n",
+ " SP | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " seller_id seller_zip_code_prefix \\\n",
+ "0 3442f8959a84dea7ee197c632cb2df15 13023 \n",
+ "1 d1b65fc7debc3361ea86b5f14c68d2e2 13844 \n",
+ "2 ce3ad9de960102d0677a81f5d0bb7b2d 20031 \n",
+ "3 c0f3eea2e14555b6faeea3dd58c1b1c3 4195 \n",
+ "4 51a04a8a6bdcb23deccc82b0b80742cf 12914 \n",
+ "\n",
+ " seller_city seller_state \n",
+ "0 campinas SP \n",
+ "1 mogi guacu SP \n",
+ "2 rio de janeiro RJ \n",
+ "3 sao paulo SP \n",
+ "4 braganca paulista SP "
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import sys\n",
+ "import os\n",
+ "\n",
+ "\n",
+ "sys.path.insert(0, os.path.abspath(os.getcwd()))\n",
+ "\n",
+ "from olist.data import Olist\n",
+ "data = Olist().get_data()\n",
+ "data['sellers'].head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-03-16T14:22:40.644246Z",
+ "iopub.status.busy": "2026-03-16T14:22:40.643244Z",
+ "iopub.status.idle": "2026-03-16T14:22:40.666105Z",
+ "shell.execute_reply": "2026-03-16T14:22:40.664962Z",
+ "shell.execute_reply.started": "2026-03-16T14:22:40.644203Z"
+ }
+ },
"outputs": [],
"source": [
"# \"magic commands\" to enable autoreload of your imported packages\n",
@@ -54,9 +175,28 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 2,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-03-16T14:22:43.795377Z",
+ "iopub.status.busy": "2026-03-16T14:22:43.794965Z",
+ "iopub.status.idle": "2026-03-16T14:22:43.816054Z",
+ "shell.execute_reply": "2026-03-16T14:22:43.814867Z",
+ "shell.execute_reply.started": "2026-03-16T14:22:43.795347Z"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "PosixPath('/home/bariscan/.workintech/olist/data/csv')"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"from pathlib import Path\n",
"csv_path = Path(\"~/.workintech/olist/data/csv\").expanduser()\n",
@@ -76,9 +216,36 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 3,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-03-16T14:22:46.707169Z",
+ "iopub.status.busy": "2026-03-16T14:22:46.706293Z",
+ "iopub.status.idle": "2026-03-16T14:22:46.724388Z",
+ "shell.execute_reply": "2026-03-16T14:22:46.722988Z",
+ "shell.execute_reply.started": "2026-03-16T14:22:46.707131Z"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[PosixPath('/home/bariscan/.workintech/olist/data/csv/product_category_name_translation.csv'),\n",
+ " PosixPath('/home/bariscan/.workintech/olist/data/csv/olist_products_dataset.csv'),\n",
+ " PosixPath('/home/bariscan/.workintech/olist/data/csv/olist_customers_dataset.csv'),\n",
+ " PosixPath('/home/bariscan/.workintech/olist/data/csv/olist_order_payments_dataset.csv'),\n",
+ " PosixPath('/home/bariscan/.workintech/olist/data/csv/olist_geolocation_dataset.csv'),\n",
+ " PosixPath('/home/bariscan/.workintech/olist/data/csv/olist_order_items_dataset.csv'),\n",
+ " PosixPath('/home/bariscan/.workintech/olist/data/csv/olist_orders_dataset.csv'),\n",
+ " PosixPath('/home/bariscan/.workintech/olist/data/csv/olist_order_reviews_dataset.csv'),\n",
+ " PosixPath('/home/bariscan/.workintech/olist/data/csv/olist_sellers_dataset.csv')]"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"file_paths = list(csv_path.iterdir())\n",
"file_paths"
@@ -86,9 +253,86 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 4,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-03-16T14:22:49.931986Z",
+ "iopub.status.busy": "2026-03-16T14:22:49.931219Z",
+ "iopub.status.idle": "2026-03-16T14:22:50.498004Z",
+ "shell.execute_reply": "2026-03-16T14:22:50.496472Z",
+ "shell.execute_reply.started": "2026-03-16T14:22:49.931947Z"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " product_category_name | \n",
+ " product_category_name_english | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " beleza_saude | \n",
+ " health_beauty | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " informatica_acessorios | \n",
+ " computers_accessories | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " automotivo | \n",
+ " auto | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " cama_mesa_banho | \n",
+ " bed_bath_table | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " moveis_decoracao | \n",
+ " furniture_decor | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " product_category_name product_category_name_english\n",
+ "0 beleza_saude health_beauty\n",
+ "1 informatica_acessorios computers_accessories\n",
+ "2 automotivo auto\n",
+ "3 cama_mesa_banho bed_bath_table\n",
+ "4 moveis_decoracao furniture_decor"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Kodunuzu aşağıda test edin. Dizindeki ilk csv dosyasını yüklemeyi deneyin\n",
"import pandas as pd\n",
@@ -108,15 +352,42 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 5,
"metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-03-16T14:22:53.721966Z",
+ "iopub.status.busy": "2026-03-16T14:22:53.720778Z",
+ "iopub.status.idle": "2026-03-16T14:22:53.742051Z",
+ "shell.execute_reply": "2026-03-16T14:22:53.740928Z",
+ "shell.execute_reply.started": "2026-03-16T14:22:53.721927Z"
+ },
"tags": [
"challengify"
]
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['product_category_name_translation.csv',\n",
+ " 'olist_products_dataset.csv',\n",
+ " 'olist_customers_dataset.csv',\n",
+ " 'olist_order_payments_dataset.csv',\n",
+ " 'olist_geolocation_dataset.csv',\n",
+ " 'olist_order_items_dataset.csv',\n",
+ " 'olist_orders_dataset.csv',\n",
+ " 'olist_order_reviews_dataset.csv',\n",
+ " 'olist_sellers_dataset.csv']"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# YOUR CODE HERE"
+ "file_names = [path.name for path in file_paths]\n",
+ "file_names"
]
},
{
@@ -140,15 +411,45 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 6,
"metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-03-16T14:22:56.687132Z",
+ "iopub.status.busy": "2026-03-16T14:22:56.686004Z",
+ "iopub.status.idle": "2026-03-16T14:22:56.703979Z",
+ "shell.execute_reply": "2026-03-16T14:22:56.702976Z",
+ "shell.execute_reply.started": "2026-03-16T14:22:56.687102Z"
+ },
"tags": [
"challengify"
]
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['product_category_name_translation',\n",
+ " 'products',\n",
+ " 'customers',\n",
+ " 'order_payments',\n",
+ " 'geolocation',\n",
+ " 'order_items',\n",
+ " 'orders',\n",
+ " 'order_reviews',\n",
+ " 'sellers']"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# YOUR CODE HERE"
+ "key_names = [\n",
+ " name.replace('olist_', '').replace('_dataset.csv', '').replace('.csv', '') \n",
+ " for name in file_names\n",
+ "]\n",
+ "key_names"
]
},
{
@@ -184,15 +485,151 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 7,
"metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-03-16T14:23:00.714105Z",
+ "iopub.status.busy": "2026-03-16T14:23:00.713593Z",
+ "iopub.status.idle": "2026-03-16T14:23:02.753272Z",
+ "shell.execute_reply": "2026-03-16T14:23:02.752039Z",
+ "shell.execute_reply.started": "2026-03-16T14:23:00.714071Z"
+ },
"tags": [
"challengify"
]
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " order_id | \n",
+ " customer_id | \n",
+ " order_status | \n",
+ " order_purchase_timestamp | \n",
+ " order_approved_at | \n",
+ " order_delivered_carrier_date | \n",
+ " order_delivered_customer_date | \n",
+ " order_estimated_delivery_date | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " e481f51cbdc54678b7cc49136f2d6af7 | \n",
+ " 9ef432eb6251297304e76186b10a928d | \n",
+ " delivered | \n",
+ " 2017-10-02 10:56:33 | \n",
+ " 2017-10-02 11:07:15 | \n",
+ " 2017-10-04 19:55:00 | \n",
+ " 2017-10-10 21:25:13 | \n",
+ " 2017-10-18 00:00:00 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 53cdb2fc8bc7dce0b6741e2150273451 | \n",
+ " b0830fb4747a6c6d20dea0b8c802d7ef | \n",
+ " delivered | \n",
+ " 2018-07-24 20:41:37 | \n",
+ " 2018-07-26 03:24:27 | \n",
+ " 2018-07-26 14:31:00 | \n",
+ " 2018-08-07 15:27:45 | \n",
+ " 2018-08-13 00:00:00 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 47770eb9100c2d0c44946d9cf07ec65d | \n",
+ " 41ce2a54c0b03bf3443c3d931a367089 | \n",
+ " delivered | \n",
+ " 2018-08-08 08:38:49 | \n",
+ " 2018-08-08 08:55:23 | \n",
+ " 2018-08-08 13:50:00 | \n",
+ " 2018-08-17 18:06:29 | \n",
+ " 2018-09-04 00:00:00 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 949d5b44dbf5de918fe9c16f97b45f8a | \n",
+ " f88197465ea7920adcdbec7375364d82 | \n",
+ " delivered | \n",
+ " 2017-11-18 19:28:06 | \n",
+ " 2017-11-18 19:45:59 | \n",
+ " 2017-11-22 13:39:59 | \n",
+ " 2017-12-02 00:28:42 | \n",
+ " 2017-12-15 00:00:00 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " ad21c59c0840e6cb83a9ceb5573f8159 | \n",
+ " 8ab97904e6daea8866dbdbc4fb7aad2c | \n",
+ " delivered | \n",
+ " 2018-02-13 21:18:39 | \n",
+ " 2018-02-13 22:20:29 | \n",
+ " 2018-02-14 19:46:34 | \n",
+ " 2018-02-16 18:17:02 | \n",
+ " 2018-02-26 00:00:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " order_id customer_id \\\n",
+ "0 e481f51cbdc54678b7cc49136f2d6af7 9ef432eb6251297304e76186b10a928d \n",
+ "1 53cdb2fc8bc7dce0b6741e2150273451 b0830fb4747a6c6d20dea0b8c802d7ef \n",
+ "2 47770eb9100c2d0c44946d9cf07ec65d 41ce2a54c0b03bf3443c3d931a367089 \n",
+ "3 949d5b44dbf5de918fe9c16f97b45f8a f88197465ea7920adcdbec7375364d82 \n",
+ "4 ad21c59c0840e6cb83a9ceb5573f8159 8ab97904e6daea8866dbdbc4fb7aad2c \n",
+ "\n",
+ " order_status order_purchase_timestamp order_approved_at \\\n",
+ "0 delivered 2017-10-02 10:56:33 2017-10-02 11:07:15 \n",
+ "1 delivered 2018-07-24 20:41:37 2018-07-26 03:24:27 \n",
+ "2 delivered 2018-08-08 08:38:49 2018-08-08 08:55:23 \n",
+ "3 delivered 2017-11-18 19:28:06 2017-11-18 19:45:59 \n",
+ "4 delivered 2018-02-13 21:18:39 2018-02-13 22:20:29 \n",
+ "\n",
+ " order_delivered_carrier_date order_delivered_customer_date \\\n",
+ "0 2017-10-04 19:55:00 2017-10-10 21:25:13 \n",
+ "1 2018-07-26 14:31:00 2018-08-07 15:27:45 \n",
+ "2 2018-08-08 13:50:00 2018-08-17 18:06:29 \n",
+ "3 2017-11-22 13:39:59 2017-12-02 00:28:42 \n",
+ "4 2018-02-14 19:46:34 2018-02-16 18:17:02 \n",
+ "\n",
+ " order_estimated_delivery_date \n",
+ "0 2017-10-18 00:00:00 \n",
+ "1 2018-08-13 00:00:00 \n",
+ "2 2018-09-04 00:00:00 \n",
+ "3 2017-12-15 00:00:00 \n",
+ "4 2018-02-26 00:00:00 "
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# YOUR CODE HERE"
+ "data = {key: pd.read_csv(path) for key, path in zip(key_names, file_paths)}\n",
+ "\n",
+ "# Kontrol etmek için herhangi bir tablonun ilk 5 satırına bakalım\n",
+ "data['orders'].head()"
]
},
{
@@ -210,9 +647,105 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 10,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-03-16T14:23:58.746270Z",
+ "iopub.status.busy": "2026-03-16T14:23:58.745836Z",
+ "iopub.status.idle": "2026-03-16T14:24:00.715719Z",
+ "shell.execute_reply": "2026-03-16T14:24:00.714342Z",
+ "shell.execute_reply.started": "2026-03-16T14:23:58.746240Z"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " seller_id | \n",
+ " seller_zip_code_prefix | \n",
+ " seller_city | \n",
+ " seller_state | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 3442f8959a84dea7ee197c632cb2df15 | \n",
+ " 13023 | \n",
+ " campinas | \n",
+ " SP | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " d1b65fc7debc3361ea86b5f14c68d2e2 | \n",
+ " 13844 | \n",
+ " mogi guacu | \n",
+ " SP | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " ce3ad9de960102d0677a81f5d0bb7b2d | \n",
+ " 20031 | \n",
+ " rio de janeiro | \n",
+ " RJ | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " c0f3eea2e14555b6faeea3dd58c1b1c3 | \n",
+ " 4195 | \n",
+ " sao paulo | \n",
+ " SP | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 51a04a8a6bdcb23deccc82b0b80742cf | \n",
+ " 12914 | \n",
+ " braganca paulista | \n",
+ " SP | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " seller_id seller_zip_code_prefix \\\n",
+ "0 3442f8959a84dea7ee197c632cb2df15 13023 \n",
+ "1 d1b65fc7debc3361ea86b5f14c68d2e2 13844 \n",
+ "2 ce3ad9de960102d0677a81f5d0bb7b2d 20031 \n",
+ "3 c0f3eea2e14555b6faeea3dd58c1b1c3 4195 \n",
+ "4 51a04a8a6bdcb23deccc82b0b80742cf 12914 \n",
+ "\n",
+ " seller_city seller_state \n",
+ "0 campinas SP \n",
+ "1 mogi guacu SP \n",
+ "2 rio de janeiro RJ \n",
+ "3 sao paulo SP \n",
+ "4 braganca paulista SP "
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"from olist.data import Olist\n",
"Olist().get_data()['sellers'].head()"
@@ -227,9 +760,47 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 8,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-03-16T14:23:07.795316Z",
+ "iopub.status.busy": "2026-03-16T14:23:07.794497Z",
+ "iopub.status.idle": "2026-03-16T14:23:10.511655Z",
+ "shell.execute_reply": "2026-03-16T14:23:10.510455Z",
+ "shell.execute_reply.started": "2026-03-16T14:23:07.795273Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "\u001b[1m============================= test session starts ==============================\u001b[0m\n",
+ "platform linux -- Python 3.12.9, pytest-8.3.4, pluggy-1.5.0 -- /home/bariscan/.pyenv/versions/3.12.9/envs/workintech/bin/python\n",
+ "cachedir: .pytest_cache\n",
+ "rootdir: /home/bariscan/data-data-preparation/tests\n",
+ "plugins: typeguard-4.4.2, anyio-4.8.0\n",
+ "\u001b[1mcollecting ... \u001b[0mcollected 3 items\n",
+ "\n",
+ "test_get_data.py::TestGetData::test_columns \u001b[32mPASSED\u001b[0m\u001b[32m [ 33%]\u001b[0m\n",
+ "test_get_data.py::TestGetData::test_keys \u001b[32mPASSED\u001b[0m\u001b[32m [ 66%]\u001b[0m\n",
+ "test_get_data.py::TestGetData::test_len \u001b[32mPASSED\u001b[0m\u001b[32m [100%]\u001b[0m\n",
+ "\n",
+ "\u001b[32m============================== \u001b[32m\u001b[1m3 passed\u001b[0m\u001b[32m in 0.02s\u001b[0m\u001b[32m ===============================\u001b[0m\n",
+ "\n",
+ "\n",
+ "💯 You can commit your code:\n",
+ "\n",
+ "\u001b[1;32mgit\u001b[39m add tests/get_data.pickle\n",
+ "\n",
+ "\u001b[32mgit\u001b[39m commit -m \u001b[33m'Completed get_data step'\u001b[39m\n",
+ "\n",
+ "\u001b[32mgit\u001b[39m push origin master\n",
+ "\n"
+ ]
+ }
+ ],
"source": [
"from nbresult import ChallengeResult\n",
"from olist.data import Olist\n",
@@ -269,9 +840,21 @@
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.9"
}
},
"nbformat": 4,
diff --git a/olist/__init__.py b/olist/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/olist/data.py b/olist/data.py
new file mode 100644
index 0000000..5380a45
--- /dev/null
+++ b/olist/data.py
@@ -0,0 +1,34 @@
+import pandas as pd
+from pathlib import Path
+
+class Olist:
+ def __init__(self):
+ # Notebook'taki ile aynı yolu tanımlıyoruz
+ self.csv_path = Path("~/.workintech/olist/data/csv").expanduser()
+
+ def get_data(self):
+ """
+ 9 CSV dosyasını okur, isimlerini temizler ve bir dict içinde döndürür.
+ """
+ if not self.csv_path.exists():
+ raise FileNotFoundError(f"Veri yolu bulunamadı: {self.csv_path}")
+
+ # Dosya yollarını listele
+ file_paths = list(self.csv_path.iterdir())
+
+ # Dosya isimlerini al
+ file_names = [path.name for path in file_paths if path.suffix == '.csv']
+
+ # Anahtar isimlerini temizle
+ key_names = [
+ name.replace('olist_', '').replace('_dataset.csv', '').replace('.csv', '')
+ for name in file_names
+ ]
+
+ # Sözlüğü oluştur (Notebook'ta yaptığımız mantıkla)
+ data = {
+ key: pd.read_csv(path)
+ for key, path in zip(key_names, file_paths)
+ }
+
+ return data
diff --git a/tests/get_data.pickle b/tests/get_data.pickle
new file mode 100644
index 0000000..00d22d8
Binary files /dev/null and b/tests/get_data.pickle differ