diff --git a/plot1.R b/plot1.R new file mode 100644 index 00000000000..6a0725282e8 --- /dev/null +++ b/plot1.R @@ -0,0 +1,46 @@ +# What's my available memory? + system('systeminfo') + #...Available Physical Memory: 345 MB + +# How much memory does the file require? + object.size(url) + #184 bytes = 0.000184 MB + #plenty of room + +# Read in the data (assuming working directory is set) + url <- "https://d396qusza40orc.cloudfront.net/exdata%2Fdata%2Fhousehold_power_consumption.zip" + #download.file(url, "household_power_consumption.txt") + #^ this does not work because the file is zipped + #found help here: http://stackoverflow.com/questions/3053833/using-r-to-download-zipped-data-file-extract-and-import-data + temp <- tempfile() + download.file(url,temp) + mydata <- read.table(unz(temp, "household_power_consumption.txt"), header = TRUE, sep = ";", col.names = c("Date", "Time", "Global_active_power", "Global_reactive_power", "Voltage", "Global_intensity", "Sub_metering_1", "Sub_metering_2", "Sub_metering_3")) + mydata_mod <- mydata #create a second df for manipulation + +#Fix the class of the Date and Time variables + #Date + mydata_mod$Date <- as.Date(as.character(mydata_mod$Date), format = "%d/%m/%Y") + class(mydata_mod$Date) + #[1] "Date" + #Time - Attempt 1 + #mydata_mod$Time <- strptime(as.character(mydata_mod$Time), format = "%H:%M:%S") + #^this adds an unwanted (and incorrect) date portion + #mydata_mod$Time <- strftime(mydata_mod$Time, format = "%T") + #^this removes the unwanted (and incorrect) date portion, but turns the class back to character + #Time - Attempt 2 + library(chron) + mydata_mod$Time <- times(mydata_mod$Time) + class(mydata_mod$Time) + #[1] "times" + +# Subset the data to the only 2 dates we're interested in + mydata_subset <- mydata_mod[mydata_mod$Date == "2007-02-01"|mydata_mod$Date == "2007-02-02",] + +# Plot the data + mydata_subset$Global_active_power <- as.numeric(as.character(mydata_subset$Global_active_power)) + #^initially got an error because the variable was a factor + png("plot1.png") + hist(mydata_subset$Global_active_power, col = "red", main = "Global Active Power", xlab = "Global Active Power (kilowatts)") + dev.off() + + \ No newline at end of file diff --git a/plot1.png b/plot1.png new file mode 100644 index 00000000000..ed79d23e473 Binary files /dev/null and b/plot1.png differ diff --git a/plot2.R b/plot2.R new file mode 100644 index 00000000000..9680eb3bd18 --- /dev/null +++ b/plot2.R @@ -0,0 +1,47 @@ +# What's my available memory? + system('systeminfo') + #...Available Physical Memory: 345 MB + +# How much memory does the file require? + object.size(url) + #184 bytes = 0.000184 MB + #plenty of room + +# Read in the data (assuming working directory is set) + url <- "https://d396qusza40orc.cloudfront.net/exdata%2Fdata%2Fhousehold_power_consumption.zip" + #download.file(url, "household_power_consumption.txt") + #^ this does not work because the file is zipped + #found help here: http://stackoverflow.com/questions/3053833/using-r-to-download-zipped-data-file-extract-and-import-data + temp <- tempfile() + download.file(url,temp) + mydata <- read.table(unz(temp, "household_power_consumption.txt"), header = TRUE, sep = ";", col.names = c("Date", "Time", "Global_active_power", "Global_reactive_power", "Voltage", "Global_intensity", "Sub_metering_1", "Sub_metering_2", "Sub_metering_3")) + mydata_mod <- mydata #create a second df for manipulation + +#Fix the class of the Date and Time variables + #Date + mydata_mod$Date <- as.Date(as.character(mydata_mod$Date), format = "%d/%m/%Y") + class(mydata_mod$Date) + #[1] "Date" + #Time - Attempt 1 + #mydata_mod$Time <- strptime(as.character(mydata_mod$Time), format = "%H:%M:%S") + #^this adds an unwanted (and incorrect) date portion + #mydata_mod$Time <- strftime(mydata_mod$Time, format = "%T") + #^this removes the unwanted (and incorrect) date portion, but turns the class back to character + #Time - Attempt 2 + library(chron) + mydata_mod$Time <- times(mydata_mod$Time) + class(mydata_mod$Time) + #[1] "times" + +# Subset the data to the only 2 dates we're interested in + mydata_subset <- mydata_mod[mydata_mod$Date == "2007-02-01"|mydata_mod$Date == "2007-02-02",] + +# Plot the data + #First have to "merge" the Time column data into the Date column, for one date/time variable + mydata_subset2 <- mydata_subset + mydata_subset2$Date <- strptime(paste(as.character(mydata_subset$Date), as.character(mydata_subset$Time)), format = "%Y-%m-%d %H:%M:%S") + #Then plot + png("plot2.png") + plot(mydata_subset2$Date, mydata_subset2$Global_active_power, type = "l", xlab = "", ylab = "Global Active Power (kilowatts)") + dev.off() + \ No newline at end of file diff --git a/plot2.png b/plot2.png new file mode 100644 index 00000000000..08381f8befb Binary files /dev/null and b/plot2.png differ diff --git a/plot3.R b/plot3.R new file mode 100644 index 00000000000..f3e658fda37 --- /dev/null +++ b/plot3.R @@ -0,0 +1,62 @@ +# What's my available memory? + system('systeminfo') + #...Available Physical Memory: 345 MB + +# How much memory does the file require? + object.size(url) + #184 bytes = 0.000184 MB + #plenty of room + +# Read in the data (assuming working directory is set) + url <- "https://d396qusza40orc.cloudfront.net/exdata%2Fdata%2Fhousehold_power_consumption.zip" + #download.file(url, "household_power_consumption.txt") + #^ this does not work because the file is zipped + #found help here: http://stackoverflow.com/questions/3053833/using-r-to-download-zipped-data-file-extract-and-import-data + temp <- tempfile() + download.file(url,temp) + mydata <- read.table(unz(temp, "household_power_consumption.txt"), header = TRUE, sep = ";", col.names = c("Date", "Time", "Global_active_power", "Global_reactive_power", "Voltage", "Global_intensity", "Sub_metering_1", "Sub_metering_2", "Sub_metering_3")) + mydata_mod <- mydata #create a second df for manipulation + +#Fix the class of the Date and Time variables + #Date + mydata_mod$Date <- as.Date(as.character(mydata_mod$Date), format = "%d/%m/%Y") + class(mydata_mod$Date) + #[1] "Date" + #Time - Attempt 1 + #mydata_mod$Time <- strptime(as.character(mydata_mod$Time), format = "%H:%M:%S") + #^this adds an unwanted (and incorrect) date portion + #mydata_mod$Time <- strftime(mydata_mod$Time, format = "%T") + #^this removes the unwanted (and incorrect) date portion, but turns the class back to character + #Time - Attempt 2 + library(chron) + mydata_mod$Time <- times(mydata_mod$Time) + class(mydata_mod$Time) + #[1] "times" + +# Subset the data to the only 2 dates we're interested in + mydata_subset <- mydata_mod[mydata_mod$Date == "2007-02-01"|mydata_mod$Date == "2007-02-02",] + +# Plot the data + #First have to "merge" the Time column data into the Date column, for one date/time variable + mydata_subset2 <- mydata_subset + mydata_subset2$Date <- strptime(paste(as.character(mydata_subset$Date), as.character(mydata_subset$Time)), format = "%Y-%m-%d %H:%M:%S") + #Then have to get factor variables into numeric form + mydata_subset2$Sub_metering_1 <- as.numeric(as.character(mydata_subset2$Sub_metering_1)) + mydata_subset2$Sub_metering_2 <- as.numeric(as.character(mydata_subset2$Sub_metering_2)) + #mydata_subset2$Sub_metering_3 is already numeric + + #Then plot + png("plot3.png") + + plot(mydata_subset2$Date, mydata_subset2$Sub_metering_1, type = "l", xlab = "", ylab = "Energy sub metering", ylim = c(-1,39), yaxt = "n") + #found ylim max and min by manually checking max and min of each y data set + axis(side = 2, at = c(0,10,20,30)) + lines(mydata_subset2$Date, mydata_subset2$Sub_metering_2, col = "red") + lines(mydata_subset2$Date, mydata_subset2$Sub_metering_3, col = "blue") + legend("topright", legend = c("Sub_metering_1", "Sub_metering_2", "Sub_metering_3"), col = c("black", "red", "blue"), lty = 1) + + dev.off() + + + + \ No newline at end of file diff --git a/plot3.png b/plot3.png new file mode 100644 index 00000000000..252ed4442a1 Binary files /dev/null and b/plot3.png differ diff --git a/plot4.R b/plot4.R new file mode 100644 index 00000000000..fd1ff8776bf --- /dev/null +++ b/plot4.R @@ -0,0 +1,76 @@ +# What's my available memory? + system('systeminfo') + #...Available Physical Memory: 345 MB + +# How much memory does the file require? + object.size(url) + #184 bytes = 0.000184 MB + #plenty of room + +# Read in the data (assuming working directory is set) + url <- "https://d396qusza40orc.cloudfront.net/exdata%2Fdata%2Fhousehold_power_consumption.zip" + #download.file(url, "household_power_consumption.txt") + #^ this does not work because the file is zipped + #found help here: http://stackoverflow.com/questions/3053833/using-r-to-download-zipped-data-file-extract-and-import-data + temp <- tempfile() + download.file(url,temp) + mydata <- read.table(unz(temp, "household_power_consumption.txt"), header = TRUE, sep = ";", col.names = c("Date", "Time", "Global_active_power", "Global_reactive_power", "Voltage", "Global_intensity", "Sub_metering_1", "Sub_metering_2", "Sub_metering_3")) + mydata_mod <- mydata #create a second df for manipulation + +#Fix the class of the Date and Time variables + #Date + mydata_mod$Date <- as.Date(as.character(mydata_mod$Date), format = "%d/%m/%Y") + class(mydata_mod$Date) + #[1] "Date" + #Time - Attempt 1 + #mydata_mod$Time <- strptime(as.character(mydata_mod$Time), format = "%H:%M:%S") + #^this adds an unwanted (and incorrect) date portion + #mydata_mod$Time <- strftime(mydata_mod$Time, format = "%T") + #^this removes the unwanted (and incorrect) date portion, but turns the class back to character + #Time - Attempt 2 + library(chron) + mydata_mod$Time <- times(mydata_mod$Time) + class(mydata_mod$Time) + #[1] "times" + +# Subset the data to the only 2 dates we're interested in + mydata_subset <- mydata_mod[mydata_mod$Date == "2007-02-01"|mydata_mod$Date == "2007-02-02",] + +# Plot the data + #First have to "merge" the Time column data into the Date column, for one date/time variable + mydata_subset2 <- mydata_subset + mydata_subset2$Date <- strptime(paste(as.character(mydata_subset$Date), as.character(mydata_subset$Time)), format = "%Y-%m-%d %H:%M:%S") + #Then have to get factor variables into numeric form + mydata_subset2$Sub_metering_1 <- as.numeric(as.character(mydata_subset2$Sub_metering_1)) + mydata_subset2$Sub_metering_2 <- as.numeric(as.character(mydata_subset2$Sub_metering_2)) + #mydata_subset2$Sub_metering_3 is already numeric + mydata_subset2$Global_reactive_power <- as.numeric(as.character(mydata_subset2$Global_reactive_power)) + mydata_subset2$Voltage <- as.numeric(as.character(mydata_subset2$Voltage)) + + #Then plot + png("plot4.png") + + par(mfrow = c(2, 2), mar = c(5, 4, 2, 1)) + + #Plot 4a (=Plot2) + plot(mydata_subset2$Date, mydata_subset2$Global_active_power, type = "l", xlab = "", ylab = "Global Active Power") + + #Plot 4b (new) + plot(mydata_subset2$Date, mydata_subset2$Voltage, type = "l", xlab = "datetime", ylab = "Voltage") + + #Plot 4c (=Plot3) + plot(mydata_subset2$Date, mydata_subset2$Sub_metering_1, type = "l", xlab = "", ylab = "Energy sub metering", ylim = c(-1,39), yaxt = "n") + #found ylim max and min by manually checking max and min of each y data set + axis(side = 2, at = c(0,10,20,30)) + lines(mydata_subset2$Date, mydata_subset2$Sub_metering_2, col = "red") + lines(mydata_subset2$Date, mydata_subset2$Sub_metering_3, col = "blue") + legend("topright", legend = c("Sub_metering_1", "Sub_metering_2", "Sub_metering_3"), col = c("black", "red", "blue"), lty = 1, cex = 0.7) + + #Plot 4d (new) + plot(mydata_subset2$Date, mydata_subset2$Global_reactive_power, type = "l", xlab = "", ylab = "Global_reactive_power") + #^had to change class to numeric from factor to get y-axis units to match example + + dev.off() + + + diff --git a/plot4.png b/plot4.png new file mode 100644 index 00000000000..cd63b8318c3 Binary files /dev/null and b/plot4.png differ