MANASLU8
diff --git a/‎projects/ad-news-classification/lab_1/nlp_1.ipynb
+32-68 b/‎projects/ad-news-classification/lab_1/nlp_1.ipynb
+32-68
@@ -4,9 +4,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "initial_id",
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "import nltk\n",
@@ -20,6 +18,8 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "10d57caa4259352a",
+   "metadata": {},
    "outputs": [],
    "source": [
     "import os\n",
@@ -29,27 +29,23 @@
     "df.drop(columns=['Title', 'Description'], axis=1, inplace=True)\n",
     "print(df.head())\n",
     "print(df['text'][1])"
-   ],
-   "metadata": {
-    "collapsed": false
-   },
-   "id": "10d57caa4259352a"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "ad6c6f57643b7841",
+   "metadata": {},
    "outputs": [],
    "source": [
     "df['label']"
-   ],
-   "metadata": {
-    "collapsed": false
-   },
-   "id": "ad6c6f57643b7841"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "f16b78e23c14172f",
+   "metadata": {},
    "outputs": [],
    "source": [
     "import re\n",
@@ -61,29 +57,25 @@
     "    sentences = re.split(\n",
     "        r\"(((?<!\\w\\.\\w.)(?<!\\s\\w\\.)(?<![A-Z][a-z]\\.)(?<=\\.|\\?|\\!)\\s(?=[A-Z]))|((?<![\\,\\-\\:])\\n(?=[A-Z]|\\\" )))\", text)[::4]\n",
     "    return sentences"
-   ],
-   "metadata": {
-    "collapsed": false
-   },
-   "id": "f16b78e23c14172f"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "bded99f420ae9022",
+   "metadata": {},
    "outputs": [],
    "source": [
     "def split_to_words(sentence):\n",
     "    words = re.findall(r\"\\w+@\\w+\\.\\w+|\\+\\d{1,3}-\\d{3}-\\d{3}-\\d{2}-\\d{2}|\\w+\", sentence)\n",
     "    return words"
-   ],
-   "metadata": {
-    "collapsed": false
-   },
-   "id": "bded99f420ae9022"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "e627613a3632834b",
+   "metadata": {},
    "outputs": [],
    "source": [
     "def save_to_file(original, lemmatized, stemmed, id, path):\n",
@@ -93,15 +85,13 @@
     "                print(\"\", file=f)\n",
     "            else:\n",
     "                print(original[i], stemmed[i], lemmatized[i], sep=\"\\t\", file=f)"
-   ],
-   "metadata": {
-    "collapsed": false
-   },
-   "id": "e627613a3632834b"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "b2f4a0464b92971e",
+   "metadata": {},
    "outputs": [],
    "source": [
     "from nltk import WordNetLemmatizer\n",
@@ -130,82 +120,56 @@
     "            stemmed.append(sst.stem(w_processed))\n",
     "            original.append(w_processed)\n",
     "        save_to_file(original, lemmatized, stemmed, os.path.join(str(row['label']), f'{str(counter)}.tsv'), path)"
-   ],
-   "metadata": {
-    "collapsed": false
-   },
-   "id": "b2f4a0464b92971e"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "eae86957a0812dbe",
+   "metadata": {},
    "outputs": [],
-   "source": [],
-   "metadata": {
-    "collapsed": false
-   },
-   "id": "eae86957a0812dbe"
+   "source": []
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "9dc8c43fc3a82676",
+   "metadata": {},
    "outputs": [],
    "source": [
     "df = pd.read_csv(os.path.join('../../dataset/raw/train.csv'), names=['label', 'Title', 'Description'])\n",
     "df['text'] = (df['Title'] + '. ' + df['Description'])\n",
     "df.drop(columns=['Title', 'Description'], axis=1, inplace=True)\n",
     "\n",
     "process_file(df, os.path.join('..', 'assets', 'annotated-corpus', 'train'))"
-   ],
-   "metadata": {
-    "collapsed": false
-   },
-   "id": "9dc8c43fc3a82676"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "65ad549a2d70ff6c",
+   "metadata": {},
    "outputs": [],
    "source": [
     "df = pd.read_csv(os.path.join('../../dataset/raw/test.csv'), names=['label', 'Title', 'Description'])\n",
     "df['text'] = (df['Title'] + '. ' + df['Description'])\n",
     "df.drop(columns=['Title', 'Description'], axis=1, inplace=True)\n",
     "\n",
     "process_file(df, os.path.join('..', 'assets', 'annotated-corpus', 'test'))"
-   ],
-   "metadata": {
-    "collapsed": false
-   },
-   "id": "65ad549a2d70ff6c"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "174c4b216214cc80",
+   "metadata": {},
    "outputs": [],
-   "source": [],
-   "metadata": {
-    "collapsed": false
-   },
-   "id": "174c4b216214cc80"
+   "source": []
   }
  ],
  "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
   "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.6"
+   "name": "python"
   }
  },
  "nbformat": 4,