indoorlocalization.py

# -*- coding: utf-8 -*-
"""IndoorLocalization.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1oLj9P4SwfepSuzOrjSIsajrwm-wuRDZo
"""

'''
'OUTSIDE'- 0
'INSIDE' - 1
'IN_VESTIBULE' - 2
'''

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

from google.colab import drive
drive.mount('/content/drive')

#de = pd.read_csv("drive/My Drive/iBeacon_RSSI_Labeled.csv")
df=pd.read_csv("drive/My Drive/IndoorLocal/out.csv")

df.head()

df.shape

dups = df.duplicated()
df[dups]

df.drop_duplicates(keep=False, inplace=True)

df.shape

df.isnull().sum()

column_modes = df.mode().iloc[0]

#Replace null values with the corresponding column's mode.
df.fillna(column_modes, inplace=True)

df.isnull().sum()

df.dtypes

df['rssiOne'] = pd.to_numeric(df['rssiOne'], errors='coerce').astype('Int64')

df['rssiTwo'] = pd.to_numeric(df['rssiTwo'], errors='coerce').astype('Int64')

df['timestamp'] = pd.to_numeric(df['timestamp'], errors='coerce').astype('Int64')

df.dtypes

df['locationStatus'].unique()

column_to_check = 'locationStatus'
target_value = 'locationStatus'

count_of_occurrences = df[df[column_to_check] == target_value].shape[0]
print(count_of_occurrences)

column_to_check = 'locationStatus'
target_value = 'locationStatus'

# Delete rows where the value in 'column_name' matches 'target_value'
df = df[df[column_to_check] != target_value]

df.shape

df['locationStatus'].unique()

df['locationStatus'] = df['locationStatus'].replace('OUTSIDE', 0)
df['locationStatus'] = df['locationStatus'].replace('INSIDE', 1)
df['locationStatus'] = df['locationStatus'].replace('IN_VESTIBULE', 2)

df.dtypes

df.head

df['name'].unique()

#VISUALIZING
import matplotlib.pyplot as plt
import plotly.express as px

for col in df.columns:
  fig=px.histogram(df,x=col)
  fig.show()

#Checking outliers
import matplotlib.pyplot as plt
import plotly.express as px

for col in df.columns:
  fig=px.box(df,y=col)
  fig.show()

import re
df = df.rename(columns = lambda x:re.sub('[^A-Za-z0-9_]+', '', x))
df.head()

# get the X and Y dataframes

X= df.iloc[:, 1:]
X=X.drop(['locationStatus'], axis=1)
y=df['locationStatus']

X.shape

y.shape

# split the dataset

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print('X_train : ')
print(X_train.head())
print('')
print('X_test : ')
print(X_test.head())
print('')
print('y_train : ')
print(y_train.head())
print('')
print('y_test : ')
print(y_test.head())

print('X_train : ')
print(X_train.shape)
print('')
print('X_test : ')
print(X_test.shape)
print('')
print('y_train : ')
print(y_train.shape)
print('')
print('y_test : ')
print(y_test.shape)

#RANDOM FOREST CLASSIFIER

from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification

rf = RandomForestClassifier(random_state=0, n_estimators=500)
rf.fit(X_train, y_train)
print(rf.predict(X_test))

from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
import math
# Given values
Y_true = y_test  # Y_true = Y (original values)

# calculated values
Y_pred = rf.predict(X_test)  # Y_pred = Y'

# Calculation of Mean Squared Error (MSE)
mse=mean_squared_error(Y_true,Y_pred)
rmse = math.sqrt(mse)
print(mse)
print("The difference between actual and predicted values", rmse)
print(mean_absolute_error(y_test, Y_pred))

#GRADIENT TREE BOOSTING CLASSIFIER

from sklearn.ensemble import GradientBoostingClassifier
gb = GradientBoostingClassifier(learning_rate=0.01, n_estimators=500)
gb.fit(X_train, y_train)
print(gb.predict(X_test))

from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
import math
# Given values
Y_true = y_test  # Y_true = Y (original values)

# calculated values
Y_pred = gb.predict(X_test)  # Y_pred = Y'

# Calculation of Mean Squared Error (MSE)
mse=mean_squared_error(Y_true,Y_pred)
rmse = math.sqrt(mse)
print(mse)
print("The difference between actual and predicted values", rmse)
print(mean_absolute_error(y_test, Y_pred))

#KNN CLASSIFIER

X = X_test
y =  y_test

# Instantiate and train the classifier
from sklearn.neighbors import KNeighborsClassifier
clf = KNeighborsClassifier(n_neighbors=10)
clf.fit(X, y)

# Check the results using metrics
from sklearn import metrics
y_pred = clf.predict(X)
print(y_pred)
print(metrics.confusion_matrix(y_pred, y))

from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
import math
# Given values
Y_true = y_test  # Y_true = Y (original values)

# calculated values
Y_pred = clf.predict(X_test)  # Y_pred = Y'

# Calculation of Mean Squared Error (MSE)
mse=mean_squared_error(Y_true,Y_pred)
rmse = math.sqrt(mse)
print(mse)
print("The difference between actual and predicted values", rmse)
print(mean_absolute_error(y_test, y_pred))

#LOGISTIC REGRESSION CLASSIFIER

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

logreg_classifier = LogisticRegression()
logreg_classifier.fit(X_train, y_train)

# Make predictions on the test set
y_pred = logreg_classifier.predict(X_test)

print(y_pred)
print(metrics.confusion_matrix(y_pred, y))

from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
import math
# Given values
Y_true = y_test  # Y_true = Y (original values)

# calculated values
Y_pred = logreg_classifier.predict(X_test)  # Y_pred = Y'

# Calculation of Mean Squared Error (MSE)
mse=mean_squared_error(Y_true,Y_pred)
rmse = math.sqrt(mse)
print(mse)
print("The difference between actual and predicted values", rmse)
print(mean_absolute_error(y_test, y_pred))

np.unique(y_pred)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Print a classification report with precision, recall, and F1-score
print(classification_report(y_test, y_pred))

#SUPPORT VECTOR MACHINE CLASSIFIER

from sklearn import svm
model = svm.SVC(kernel='rbf', degree=2, C=10)
model.fit(X_train, y_train)

from sklearn import metrics
y_pred = model.predict(X_test)
print(y_pred)
print(metrics.confusion_matrix(y_pred, y_test))

np.unique(y_pred)

from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
import math
# Given values
Y_true = y_test  # Y_true = Y (original values)

# calculated values
Y_pred = model.predict(X_test)  # Y_pred = Y'

# Calculation of Mean Squared Error (MSE)
mse=mean_squared_error(Y_true,Y_pred)
rmse = math.sqrt(mse)
print(mse)
print("The difference between actual and predicted values", rmse)
print(mean_absolute_error(y_test, y_pred))