Big Data with Hadoop

Practical 7:

Analyze Weather Data for Sunny or Cool Days using MapReduce

Objective:

Implement a MapReduce program that processes daily weather temperature data, classifies each day as "Sunny", "Cool", or "Moderate" based on temperature thresholds, and outputs the date along with its classification.


Step 1: Input Data Preparation

Create weather_data.txt with format YYYY-MM-DD,Temperature:

2023-01-01,5
2023-01-02,8
2023-06-15,28
2023-06-16,31
2023-09-01,18
2023-09-02,22
2024-02-10,-2
2024-07-20,35

Upload to HDFS:

hdfs dfs -mkdir /weather_input
hdfs dfs -put weather_data.txt /weather_input/weather_data.txt

Step 2: Mapper Class

import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
 
public class WeatherClassifierMapper 
        extends Mapper<LongWritable, Text, Text, Text> {
 
    private Text date = new Text();
    private Text classification = new Text();
 
    @Override
    public void map(LongWritable key, Text value, Context context) 
            throws IOException, InterruptedException {
 
        String line = value.toString();
        String[] parts = line.split(",");
 
        if (parts.length == 2) {
            try {
                String dateString = parts[0].trim();
                int temperature = Integer.parseInt(parts[1].trim());
 
                date.set(dateString);
 
                if (temperature <= 10) {
                    classification.set("Cool Day");
                } else if (temperature >= 25) {
                    classification.set("Sunny Day");
                } else {
                    classification.set("Moderate Day");
                }
 
                context.write(date, classification);
 
            } catch (NumberFormatException e) {
                System.err.println("Skipping record with invalid temperature: " + line);
            }
        } else {
            System.err.println("Skipping malformed record: " + line);
        }
    }
}

Step 3: Reducer Class (Identity Reducer)

import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
 
public class WeatherClassifierReducer 
        extends Reducer<Text, Text, Text, Text> {
 
    @Override
    public void reduce(Text key, Iterable<Text> values, Context context)
            throws IOException, InterruptedException {
        for (Text val : values) {
            context.write(key, val); // Emit (date, classification)
            break; // Only one value expected per date
        }
    }
}

Step 4: Driver Class

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
 
public class WeatherClassifierDriver {
 
    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
 
        if (otherArgs.length < 2) {
            System.err.println("Usage: weatherclassifier <input> <output>");
            System.exit(2);
        }
 
        Job job = Job.getInstance(conf, "Weather Classifier");
        job.setJarByClass(WeatherClassifierDriver.class);
        job.setMapperClass(WeatherClassifierMapper.class);
        job.setReducerClass(WeatherClassifierReducer.class);
 
        // Mapper output types
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
 
        // Final output types
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
 
        FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
        FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
 
        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}

Step 5: Compile & Package

mkdir -p classes
javac -cp "$(hadoop classpath)" -d classes WeatherClassifierMapper.java WeatherClassifierReducer.java WeatherClassifierDriver.java
jar -cvf weatherclassifier.jar -C classes/ .

Step 6: Run the Job on Hadoop

hdfs dfs -rm -r /weather_output
hadoop jar weatherclassifier.jar WeatherClassifierDriver /weather_input/weather_data.txt /weather_output

Step 7: Verify the Output

hdfs dfs -cat /weather_output/part-r-00000

Expected Output:

2023-01-01  Cool Day
2023-01-02  Cool Day
2023-06-15  Sunny Day
2023-06-16  Sunny Day
2023-09-01  Moderate Day
2023-09-02  Moderate Day
2024-02-10  Cool Day
2024-07-20  Sunny Day

Conclusion:

You have successfully implemented a MapReduce program to classify daily temperatures into "Sunny", "Cool", or "Moderate". This practical demonstrates:

·         Conditional logic application in MapReduce

·         Data parsing and transformation

·         Using an identity Reducer when Mapper output is final