// This file demonstrates reading Moving Ratings Data
// from GroupLens Research Project at the University of Minnesota
// SOURCE: http://www.grouplens.org/

/* From their README file:

u.data     -- The full u data set, 100000 ratings by 943 users on 1682 items.
              Each user has rated at least 20 movies.  Users and items are
              numbered consecutively from 1.  The data is randomly
              ordered. This is a tab separated list of 
                   user id | item id | rating | timestamp.
              The time stamps are unix seconds since 1/1/1970 UTC 
*/

// GOAL: read in data, find average rating for each film
//       and number of ratings for each user, store results
//       in two different files
//
//  -- M. Branicky, 10/11/06, 10/08/07

import java.util.Scanner;   // needed for input processing using Scanner
import java.io.PrintWriter; // needed for output processing using PrintWriter
import java.io.File;        // needed to open files

public class GLRatings {
  public static void main(String args[]) throws Exception {
    ///////////////////// INPUT AND PROCESSING ///////////////////////

    // Open the file
    // NOTE: this is the full path on my computer, you must download the
    // file yourself (on Code Repository) and change this name appropriately
    File infile = new File("/Users/msb/Downloads/ml-data/u.data");
    Scanner input = new Scanner( infile );

    // Get input from file
    /*  FORMAT IS
	     user id | item id | rating | timestamp. 
        There are 943 users and 1682 movies
    */
    int userID, itemID, rating, timestamp;
    int userN [] = new int [944];
    int itemSum [] = new int [1683];
    int itemN [] = new int [1683];

    for (int i=1; i<=943; i++) {
      userN[i]=0;
    }
    for (int i=2; i<=1682; i++) {
      itemN[i]=0;
      itemSum[i]=0;
    }

    while (input.hasNext()) {
      // read a line of the data
      userID = input.nextInt();
      itemID = input.nextInt();
      rating = input.nextInt();
      timestamp = input.nextInt();

      // update the variable of interest
      userN[userID]++;
      itemN[itemID]++;
      itemSum[itemID]+=rating;
    }

    // Close the input file
    input.close();

    ///////////////////// OUTPUT ///////////////////////

    // Open the files
    File outfile1 = new File("userN.txt");
    File outfile2 = new File("itemAvg.txt");
    PrintWriter output1 = new PrintWriter(outfile1);
    PrintWriter output2 = new PrintWriter(outfile2);

    // Write the userN output
    for (int i=1; i<=943; i++) {
      output1.println(i+"\t"+userN[i]);
    }
    double avg;
    for (int i=1; i<=1682; i++) {
      avg = ((double) itemSum[i])/((double) itemN[i]);
      output2.println(i+"\t"+avg);
    }

    // Close the files
    output1.close();
    output2.close();
  }
}
