From 1caab760ab2ac48c150bc808e2d4ad313eeeca25 Mon Sep 17 00:00:00 2001 From: Nischal Krishna Singh <85799201+Nomadic43@users.noreply.github.com> Date: Tue, 16 May 2023 12:56:51 +0530 Subject: [PATCH] Add files via upload predicting car prices by converting strings into integer using one-hot encoding or dummy variables --- ...ce prediction_using dummies variable.ipynb | 1032 +++++++++++++++++ carprices.csv | 14 + 2 files changed, 1046 insertions(+) create mode 100644 car price prediction_using dummies variable.ipynb create mode 100644 carprices.csv diff --git a/car price prediction_using dummies variable.ipynb b/car price prediction_using dummies variable.ipynb new file mode 100644 index 0000000..fabf6f7 --- /dev/null +++ b/car price prediction_using dummies variable.ipynb @@ -0,0 +1,1032 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 50, + "id": "8629ead4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CarModelMileagePriceAge(yrs)
0BMW X569000180006
1BMW X535000340003
2BMW X557000261005
3BMW X522500400002
4BMW X546000315004
5Audi A559000294005
6Audi A552000320005
7Audi A572000193006
8Audi A591000120008
9Mercedez Benz C class67000220006
10Mercedez Benz C class83000200007
11Mercedez Benz C class79000210007
12Mercedez Benz C class59000330005
\n", + "
" + ], + "text/plain": [ + " CarModel Mileage Price Age(yrs)\n", + "0 BMW X5 69000 18000 6\n", + "1 BMW X5 35000 34000 3\n", + "2 BMW X5 57000 26100 5\n", + "3 BMW X5 22500 40000 2\n", + "4 BMW X5 46000 31500 4\n", + "5 Audi A5 59000 29400 5\n", + "6 Audi A5 52000 32000 5\n", + "7 Audi A5 72000 19300 6\n", + "8 Audi A5 91000 12000 8\n", + "9 Mercedez Benz C class 67000 22000 6\n", + "10 Mercedez Benz C class 83000 20000 7\n", + "11 Mercedez Benz C class 79000 21000 7\n", + "12 Mercedez Benz C class 59000 33000 5" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "df = pd.read_csv('carprices.csv')\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "df875dcc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['CarModel', 'Mileage', 'Price', 'Age(yrs)'], dtype='object')" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "fca236b7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Audi A5BMW X5Mercedez Benz C class
0010
1010
2010
3010
4010
5100
6100
7100
8100
9001
10001
11001
12001
\n", + "
" + ], + "text/plain": [ + " Audi A5 BMW X5 Mercedez Benz C class\n", + "0 0 1 0\n", + "1 0 1 0\n", + "2 0 1 0\n", + "3 0 1 0\n", + "4 0 1 0\n", + "5 1 0 0\n", + "6 1 0 0\n", + "7 1 0 0\n", + "8 1 0 0\n", + "9 0 0 1\n", + "10 0 0 1\n", + "11 0 0 1\n", + "12 0 0 1" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dummies = pd.get_dummies(df.CarModel)\n", + "dummies" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "5398fb37", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CarModelMileagePriceAge(yrs)Audi A5BMW X5Mercedez Benz C class
0BMW X569000180006010
1BMW X535000340003010
2BMW X557000261005010
3BMW X522500400002010
4BMW X546000315004010
5Audi A559000294005100
6Audi A552000320005100
7Audi A572000193006100
8Audi A591000120008100
9Mercedez Benz C class67000220006001
10Mercedez Benz C class83000200007001
11Mercedez Benz C class79000210007001
12Mercedez Benz C class59000330005001
\n", + "
" + ], + "text/plain": [ + " CarModel Mileage Price Age(yrs) Audi A5 BMW X5 \\\n", + "0 BMW X5 69000 18000 6 0 1 \n", + "1 BMW X5 35000 34000 3 0 1 \n", + "2 BMW X5 57000 26100 5 0 1 \n", + "3 BMW X5 22500 40000 2 0 1 \n", + "4 BMW X5 46000 31500 4 0 1 \n", + "5 Audi A5 59000 29400 5 1 0 \n", + "6 Audi A5 52000 32000 5 1 0 \n", + "7 Audi A5 72000 19300 6 1 0 \n", + "8 Audi A5 91000 12000 8 1 0 \n", + "9 Mercedez Benz C class 67000 22000 6 0 0 \n", + "10 Mercedez Benz C class 83000 20000 7 0 0 \n", + "11 Mercedez Benz C class 79000 21000 7 0 0 \n", + "12 Mercedez Benz C class 59000 33000 5 0 0 \n", + "\n", + " Mercedez Benz C class \n", + "0 0 \n", + "1 0 \n", + "2 0 \n", + "3 0 \n", + "4 0 \n", + "5 0 \n", + "6 0 \n", + "7 0 \n", + "8 0 \n", + "9 1 \n", + "10 1 \n", + "11 1 \n", + "12 1 " + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "merged = pd.concat([df,dummies], axis='columns')\n", + "merged" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "edf17876", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MileagePriceAge(yrs)BMW X5Mercedez Benz C class
06900018000610
13500034000310
25700026100510
32250040000210
44600031500410
55900029400500
65200032000500
77200019300600
89100012000800
96700022000601
108300020000701
117900021000701
125900033000501
\n", + "
" + ], + "text/plain": [ + " Mileage Price Age(yrs) BMW X5 Mercedez Benz C class\n", + "0 69000 18000 6 1 0\n", + "1 35000 34000 3 1 0\n", + "2 57000 26100 5 1 0\n", + "3 22500 40000 2 1 0\n", + "4 46000 31500 4 1 0\n", + "5 59000 29400 5 0 0\n", + "6 52000 32000 5 0 0\n", + "7 72000 19300 6 0 0\n", + "8 91000 12000 8 0 0\n", + "9 67000 22000 6 0 1\n", + "10 83000 20000 7 0 1\n", + "11 79000 21000 7 0 1\n", + "12 59000 33000 5 0 1" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "final = merged.drop(['CarModel', 'Audi A5'], axis='columns')\n", + "final" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "c0b7d310", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import LinearRegression\n", + "model = LinearRegression()" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "72e618ec", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MileageAge(yrs)BMW X5Mercedez Benz C class
069000610
135000310
257000510
322500210
446000410
559000500
652000500
772000600
891000800
967000601
1083000701
1179000701
1259000501
\n", + "
" + ], + "text/plain": [ + " Mileage Age(yrs) BMW X5 Mercedez Benz C class\n", + "0 69000 6 1 0\n", + "1 35000 3 1 0\n", + "2 57000 5 1 0\n", + "3 22500 2 1 0\n", + "4 46000 4 1 0\n", + "5 59000 5 0 0\n", + "6 52000 5 0 0\n", + "7 72000 6 0 0\n", + "8 91000 8 0 0\n", + "9 67000 6 0 1\n", + "10 83000 7 0 1\n", + "11 79000 7 0 1\n", + "12 59000 5 0 1" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X = final.drop('Price', axis = 'columns')\n", + "X" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "8354593a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 18000\n", + "1 34000\n", + "2 26100\n", + "3 40000\n", + "4 31500\n", + "5 29400\n", + "6 32000\n", + "7 19300\n", + "8 12000\n", + "9 22000\n", + "10 20000\n", + "11 21000\n", + "12 33000\n", + "Name: Price, dtype: int64" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y = final.Price\n", + "y" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "5b9f5af3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LinearRegression()" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.fit(X,y)" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "c7346d3f", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\nisch\\anaconda3\\lib\\site-packages\\sklearn\\base.py:450: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "text/plain": [ + "array([36991.31721061])" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.predict([[45000, 4, 0, 1]])" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "071dd3f9", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\nisch\\anaconda3\\lib\\site-packages\\sklearn\\base.py:450: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "text/plain": [ + "array([11080.74313219])" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.predict([[86000, 7, 1, 0]])" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "6b5b3d51", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9417050937281083" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.score(X, y)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/carprices.csv b/carprices.csv new file mode 100644 index 0000000..47dab6d --- /dev/null +++ b/carprices.csv @@ -0,0 +1,14 @@ +CarModel,Mileage,Price,Age(yrs) +BMW X5,69000,18000,6 +BMW X5,35000,34000,3 +BMW X5,57000,26100,5 +BMW X5,22500,40000,2 +BMW X5,46000,31500,4 +Audi A5,59000,29400,5 +Audi A5,52000,32000,5 +Audi A5,72000,19300,6 +Audi A5,91000,12000,8 +Mercedez Benz C class,67000,22000,6 +Mercedez Benz C class,83000,20000,7 +Mercedez Benz C class,79000,21000,7 +Mercedez Benz C class,59000,33000,5