cleaned notebook

mkosaka1 · mkosaka1 · commit f384db748ed3 · 2020-12-14T19:18:39.000-05:00
diff --git a/.ipynb_checkpoints/3.EchoVariations_Analysis-checkpoint.ipynb b/.ipynb_checkpoints/3.EchoVariations_Analysis-checkpoint.ipynb
@@ -13,10 +13,6 @@
     "import seaborn as sns\n",
     "import matplotlib.pyplot as plt\n",
     "from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer\n",
-    "from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer\n",
-    "from sklearn.model_selection import train_test_split\n",
-    "from sklearn.linear_model import LogisticRegression\n",
-    "from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve, auc, log_loss\n",
     "import plotly.express as px\n",
     "import plotly.graph_objects as go\n",
     "import pickle"
@@ -239,7 +235,7 @@
     }
    ],
    "source": [
-    "# REMOVE FIRE STICK AS IT IS NOT AN ALEXA DEVICE\n",
+    "# REMOVE FIRE STICK AS IT IS NOT AN ECHO DEVICE\n",
     "df=df[df.variation!='Configuration: Fire TV Stick']\n",
     "df['variation'].value_counts()"
    ]
@@ -7348,22 +7344,12 @@
     "## Echo "
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer\n"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 31,
    "metadata": {},
    "outputs": [],
    "source": [
-    "analyser = SentimentIntensityAnalyzer()\n",
     "def sentimentScore(sentences):\n",
     "    analyzer = SentimentIntensityAnalyzer()\n",
     "    results = []\n",
@@ -7519,7 +7505,7 @@
     }
    ],
    "source": [
-    "# echo plus\n",
+    "# ECHO\n",
     "with open('Saved Models/echo.pkl','rb') as read_file:\n",
     "     echo= pickle.load(read_file)\n",
     "\n",
@@ -10721,7 +10707,7 @@
    "cell_type": "code",
    "execution_count": 59,
    "metadata": {
-    "scrolled": true
+    "scrolled": false
    },
    "outputs": [
     {
diff --git a/.ipynb_checkpoints/4.Top3_Echo_Analysis-checkpoint.ipynb b/.ipynb_checkpoints/4.Top3_Echo_Analysis-checkpoint.ipynb
@@ -8,21 +8,17 @@
    "source": [
     "from wordcloud import WordCloud, STOPWORDS\n",
     "import pandas as pd\n",
-    "import numpy as np\n",
     "import pickle\n",
     "import seaborn as sns\n",
     "import matplotlib.pyplot as plt\n",
     "from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer\n",
     "from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer\n",
     "from sklearn.model_selection import train_test_split\n",
     "from sklearn.linear_model import LogisticRegression\n",
-    "from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve, auc, log_loss\n",
     "import gensim\n",
     "from gensim import corpora\n",
     "from gensim.models import LdaModel, LdaMulticore\n",
-    "from gensim.utils import simple_preprocess, lemmatize\n",
     "from nltk.corpus import stopwords\n",
-    "\n",
     "from gensim.models.word2vec import Word2Vec\n",
     "from multiprocessing import cpu_count\n",
     "import gensim.downloader as api"
@@ -8989,7 +8985,7 @@
    "cell_type": "code",
    "execution_count": 58,
    "metadata": {
-    "scrolled": true
+    "scrolled": false
    },
    "outputs": [
     {
@@ -9079,7 +9075,7 @@
    "cell_type": "code",
    "execution_count": 60,
    "metadata": {
-    "scrolled": true
+    "scrolled": false
    },
    "outputs": [
     {
@@ -18311,20 +18307,6 @@
     "# LDA"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 69,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import gensim\n",
-    "from gensim import corpora\n",
-    "from gensim.models import LdaModel, LdaMulticore\n",
-    "import gensim.downloader as api\n",
-    "from gensim.utils import simple_preprocess, lemmatize\n",
-    "from nltk.corpus import stopwords"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 70,
@@ -18397,17 +18379,6 @@
     "# Word2Vec"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 73,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from gensim.models.word2vec import Word2Vec\n",
-    "from multiprocessing import cpu_count\n",
-    "import gensim.downloader as api"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 74,
@@ -19890,8 +19861,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from sklearn.feature_selection import chi2\n",
-    "\n",
     "tfidf_n = TfidfVectorizer(ngram_range=(2, 2))\n",
     "X_tfidf_n = tfidf_n.fit_transform(neg_alexa['new_reviews'])\n",
     "y_n = neg_alexa['rating']\n",
@@ -19990,7 +19959,7 @@
    "cell_type": "code",
    "execution_count": 118,
    "metadata": {
-    "scrolled": true
+    "scrolled": false
    },
    "outputs": [
     {
@@ -20066,9 +20035,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 86,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 1,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'pickle' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-1-87645d35ad92>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Saved Models/echoshow.pkl'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'rb'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mread_file\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m     \u001b[0mechoshow\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpickle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mread_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m: name 'pickle' is not defined"
+     ]
+    }
+   ],
    "source": [
     "with open('Saved Models/echoshow.pkl','rb') as read_file:\n",
     "    echoshow = pickle.load(read_file)"
@@ -24129,20 +24112,6 @@
     "# LDA"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 94,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import gensim\n",
-    "from gensim import corpora\n",
-    "from gensim.models import LdaModel, LdaMulticore\n",
-    "import gensim.downloader as api\n",
-    "from gensim.utils import simple_preprocess, lemmatize\n",
-    "from nltk.corpus import stopwords"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 95,
@@ -25064,7 +25033,7 @@
    "cell_type": "code",
    "execution_count": 107,
    "metadata": {
-    "scrolled": true
+    "scrolled": false
    },
    "outputs": [
     {
diff --git a/3.EchoVariations_Analysis.ipynb b/3.EchoVariations_Analysis.ipynb
@@ -13,10 +13,6 @@
     "import seaborn as sns\n",
     "import matplotlib.pyplot as plt\n",
     "from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer\n",
-    "from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer\n",
-    "from sklearn.model_selection import train_test_split\n",
-    "from sklearn.linear_model import LogisticRegression\n",
-    "from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve, auc, log_loss\n",
     "import plotly.express as px\n",
     "import plotly.graph_objects as go\n",
     "import pickle"
@@ -239,7 +235,7 @@
     }
    ],
    "source": [
-    "# REMOVE FIRE STICK AS IT IS NOT AN ALEXA DEVICE\n",
+    "# REMOVE FIRE STICK AS IT IS NOT AN ECHO DEVICE\n",
     "df=df[df.variation!='Configuration: Fire TV Stick']\n",
     "df['variation'].value_counts()"
    ]
@@ -7348,22 +7344,12 @@
     "## Echo "
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer\n"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 31,
    "metadata": {},
    "outputs": [],
    "source": [
-    "analyser = SentimentIntensityAnalyzer()\n",
     "def sentimentScore(sentences):\n",
     "    analyzer = SentimentIntensityAnalyzer()\n",
     "    results = []\n",
@@ -7519,7 +7505,7 @@
     }
    ],
    "source": [
-    "# echo plus\n",
+    "# ECHO\n",
     "with open('Saved Models/echo.pkl','rb') as read_file:\n",
     "     echo= pickle.load(read_file)\n",
     "\n",
@@ -10721,7 +10707,7 @@
    "cell_type": "code",
    "execution_count": 59,
    "metadata": {
-    "scrolled": true
+    "scrolled": false
    },
    "outputs": [
     {
diff --git a/4.Top3_Echo_Analysis.ipynb b/4.Top3_Echo_Analysis.ipynb