From 50ff5a2cc69cd0744cab79f314c72c9c909763d9 Mon Sep 17 00:00:00 2001 From: Vladislav Meshkov Date: Sun, 1 Mar 2026 18:09:22 +0300 Subject: [PATCH 1/2] add task2vec --- .gitignore | 1 + demo/task2vec/simple_example.ipynb | 306 ++++++++++++++ demo/task2vec/wget-log | 15 + setup.py | 21 + src/README.rst | 25 -- src/data_meta_map/datasets.py | 366 +++++++++++++++++ src/data_meta_map/models.py | 94 +++++ src/data_meta_map/task2vec/__init__.py | 8 + src/data_meta_map/task2vec/task2vec.py | 375 ++++++++++++++++++ src/data_meta_map/task2vec/task_similarity.py | 219 ++++++++++ src/data_meta_map/task2vec/utils.py | 65 +++ src/data_meta_map/task2vec/variational.py | 131 ++++++ src/mylib/__init__.py | 1 - src/mylib/train.py | 132 ------ src/requirements.txt | 3 - src/setup.py | 34 -- 16 files changed, 1601 insertions(+), 195 deletions(-) create mode 100644 demo/task2vec/simple_example.ipynb create mode 100644 demo/task2vec/wget-log create mode 100644 setup.py delete mode 100755 src/README.rst create mode 100644 src/data_meta_map/datasets.py create mode 100644 src/data_meta_map/models.py create mode 100644 src/data_meta_map/task2vec/__init__.py create mode 100644 src/data_meta_map/task2vec/task2vec.py create mode 100644 src/data_meta_map/task2vec/task_similarity.py create mode 100644 src/data_meta_map/task2vec/utils.py create mode 100644 src/data_meta_map/task2vec/variational.py delete mode 100755 src/mylib/__init__.py delete mode 100755 src/mylib/train.py delete mode 100755 src/requirements.txt delete mode 100755 src/setup.py diff --git a/.gitignore b/.gitignore index 66717dc..6027723 100644 --- a/.gitignore +++ b/.gitignore @@ -120,3 +120,4 @@ logs/ */mnist *.csv !.dvc +data diff --git a/demo/task2vec/simple_example.ipynb b/demo/task2vec/simple_example.ipynb new file mode 100644 index 0000000..d5b84ea --- /dev/null +++ b/demo/task2vec/simple_example.ipynb @@ -0,0 +1,306 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "732cfc36-76c6-4b8a-b4fb-1e67c9e48902", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "7c7f2bed-6353-4b29-aeed-b08cc9835a1b", + "metadata": {}, + "outputs": [], + "source": [ + "from data_meta_map.task2vec import task2vec\n", + "from data_meta_map.models import get_model\n", + "from data_meta_map import datasets\n", + "from data_meta_map.task2vec import plot_distance_matrix" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c5107e1b-93e1-4861-becb-16acf1fdd9c6", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "69f44bbb-354f-4471-9d16-b812909af972", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Files already downloaded and verified\n", + "Files already downloaded and verified\n" + ] + } + ], + "source": [ + "dataset_names = ('mnist', 'cifar10', 'cifar100', 'letters')\n", + "dataset_list = [datasets.__dict__[name](root='../../data')[0] for name in dataset_names] " + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "1a8c4af8-0fdf-44da-9d6c-1b2d86be9567", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Embedding mnist\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Caching features: 0%| | 0/14 [00:00" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot_distance_matrix(embeddings=embeddings, labels=dataset_names)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/demo/task2vec/wget-log b/demo/task2vec/wget-log new file mode 100644 index 0000000..4e6c8d6 --- /dev/null +++ b/demo/task2vec/wget-log @@ -0,0 +1,15 @@ +--2026-03-01 18:02:24-- http://codh.rois.ac.jp/kmnist/dataset/kmnist/train-labels-idx1-ubyte.gz +Распознаётся codh.rois.ac.jp (codh.rois.ac.jp)… 136.187.88.58 +Подключение к codh.rois.ac.jp (codh.rois.ac.jp)|136.187.88.58|:80... ошибка: Время ожидания соединения истекло. +Продолжение попыток. + +--2026-03-01 18:04:38-- (попытка: 2) http://codh.rois.ac.jp/kmnist/dataset/kmnist/train-labels-idx1-ubyte.gz +Подключение к codh.rois.ac.jp (codh.rois.ac.jp)|136.187.88.58|:80... ошибка: Время ожидания соединения истекло. +Продолжение попыток. + +--2026-03-01 18:06:54-- (попытка: 3) http://codh.rois.ac.jp/kmnist/dataset/kmnist/train-labels-idx1-ubyte.gz +Подключение к codh.rois.ac.jp (codh.rois.ac.jp)|136.187.88.58|:80... ошибка: Время ожидания соединения истекло. +Продолжение попыток. + +--2026-03-01 18:09:10-- (попытка: 4) http://codh.rois.ac.jp/kmnist/dataset/kmnist/train-labels-idx1-ubyte.gz +Подключение к codh.rois.ac.jp (codh.rois.ac.jp)|136.187.88.58|:80... \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..01ea3b0 --- /dev/null +++ b/setup.py @@ -0,0 +1,21 @@ +from setuptools import setup, find_packages + +setup( + name="data_meta_map", + version="0.1.0", + author="...", + description="...", + long_description=open("README.rst").read(), + long_description_content_type="text/x-rst", + url="https://github.com/intsystems/DataMetaMap", + packages=find_packages(where="src"), + package_dir={"": "src"}, + classifiers=[ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + ], + python_requires=">=3.10", + install_requires=[ + ], +) diff --git a/src/README.rst b/src/README.rst deleted file mode 100755 index 8f32660..0000000 --- a/src/README.rst +++ /dev/null @@ -1,25 +0,0 @@ -************ -Installation -************ - -Requirements -============ - -- Python 3.* -- pip 20.0.2 - -Installing by using PyPi -======================== - -Install -------- -.. code-block:: bash - - git clone https://github.com/Intelligent-Systems-Phystech/ProjectTemplate.git /tmp/ProjectTemplate - python3 -m pip install /tmp/ProjectTemplate/src/ - -Uninstall ---------- -.. code-block:: bash - - python3 -m pip uninstall mylib diff --git a/src/data_meta_map/datasets.py b/src/data_meta_map/datasets.py new file mode 100644 index 0000000..fb577d3 --- /dev/null +++ b/src/data_meta_map/datasets.py @@ -0,0 +1,366 @@ +# Copyright 2017-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. + + +import collections +import torchvision.transforms as transforms +import os +import json + +try: + from IPython import embed +except: + pass + +_DATASETS = {} + +Dataset = collections.namedtuple( + 'Dataset', ['trainset', 'testset']) + + +def _add_dataset(dataset_fn): + _DATASETS[dataset_fn.__name__] = dataset_fn + return dataset_fn + + +def _get_transforms(augment=True, normalize=None): + if normalize is None: + normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]) + + basic_transform = [transforms.ToTensor(), normalize] + + transform_train = [] + if augment: + transform_train += [ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + ] + else: + transform_train += [ + transforms.Resize(256), + transforms.CenterCrop(224), + ] + transform_train += basic_transform + transform_train = transforms.Compose(transform_train) + + transform_test = [ + transforms.Resize(256), + transforms.CenterCrop(224), + ] + transform_test += basic_transform + transform_test = transforms.Compose(transform_test) + + return transform_train, transform_test + + +def _get_mnist_transforms(augment=True, invert=False, transpose=False): + transform = [ + transforms.ToTensor(), + ] + if invert: + transform += [transforms.Lambda(lambda x: 1. - x)] + if transpose: + transform += [transforms.Lambda(lambda x: x.transpose(2, 1))] + transform += [ + transforms.Normalize((.5,), (.5,)), + transforms.Lambda(lambda x: x.expand(3, 32, 32)) + ] + + transform_train = [] + transform_train += [transforms.Pad(padding=2)] + if augment: + transform_train += [transforms.RandomCrop(32, padding=4)] + transform_train += transform + transform_train = transforms.Compose(transform_train) + + transform_test = [] + transform_test += [transforms.Pad(padding=2)] + transform_test += transform + transform_test = transforms.Compose(transform_test) + + return transform_train, transform_test + + +def _get_cifar_transforms(augment=True): + transform = [ + transforms.ToTensor(), + transforms.Normalize((0.5071, 0.4867, 0.4408), + (0.2675, 0.2565, 0.2761)), + ] + transform_train = [] + if augment: + transform_train += [ + transforms.Pad(padding=4, fill=(125, 123, 113)), + transforms.RandomCrop(32, padding=0), + transforms.RandomHorizontalFlip()] + transform_train += transform + transform_train = transforms.Compose(transform_train) + transform_test = [] + transform_test += transform + transform_test = transforms.Compose(transform_test) + return transform_train, transform_test + + +def set_metadata(trainset, testset, config, dataset_name): + trainset.metadata = { + 'dataset': dataset_name, + 'task_id': config.task_id, + 'task_name': trainset.task_name, + } + testset.metadata = { + 'dataset': dataset_name, + 'task_id': config.task_id, + 'task_name': testset.task_name, + } + return trainset, testset + + +@_add_dataset +def inat2018(root, config): + from dataset.inat import iNat2018Dataset + transform_train, transform_test = _get_transforms() + trainset = iNat2018Dataset( + root, split='train', transform=transform_train, task_id=config.task_id) + testset = iNat2018Dataset( + root, split='val', transform=transform_test, task_id=config.task_id) + trainset, testset = set_metadata(trainset, testset, config, 'inat2018') + return trainset, testset + + +def load_tasks_map(tasks_map_file): + assert os.path.exists(tasks_map_file), tasks_map_file + with open(tasks_map_file, 'r') as f: + tasks_map = json.load(f) + tasks_map = {int(k): int(v) for k, v in tasks_map.items()} + return tasks_map + + +@_add_dataset +def cub_inat2018(root, config): + """This meta-task is the concatenation of CUB-200 (first 25 tasks) and iNat (last 207 tasks). + + - The first 10 tasks are classification of the animal species inside one of 10 orders of birds in CUB-200 + (considering all orders except passeriformes). + - The next 15 tasks are classification of species inside the 15 families of the order of passerifomes + - The remaining 207 tasks are classification of the species inside each of 207 families in iNat + + As noted above, for CUB-200 10 taks are classification of species inside an order, rather than inside of a family + as done in the iNat (recall order > family > species). This is done because CUB-200 has very few images + in each family of bird (expect for the families of passeriformes). Hence, we go up step in the taxonomy and + consider classification inside a orders and not families. + """ + NUM_CUB = 25 + NUM_CUB_ORDERS = 10 + NUM_INAT = 207 + assert 0 <= config.task_id < NUM_CUB + NUM_INAT + transform_train, transform_test = _get_transforms() + if 0 <= config.task_id < NUM_CUB: + # CUB + from dataset.cub import CUBTasks, CUBDataset + tasks_map_file = os.path.join( + root, 'cub/CUB_200_2011', 'final_tasks_map.json') + tasks_map = load_tasks_map(tasks_map_file) + task_id = tasks_map[config.task_id] + + if config.task_id < NUM_CUB_ORDERS: + # CUB orders + train_tasks = CUBTasks(CUBDataset(root, split='train')) + trainset = train_tasks.generate(task_id=task_id, + use_species_names=True, + transform=transform_train) + test_tasks = CUBTasks(CUBDataset(root, split='test')) + testset = test_tasks.generate(task_id=task_id, + use_species_names=True, + transform=transform_test) + else: + # CUB passeriformes families + train_tasks = CUBTasks(CUBDataset(root, split='train')) + trainset = train_tasks.generate(task_id=task_id, + task='family', + taxonomy_file='passeriformes.txt', + use_species_names=True, + transform=transform_train) + test_tasks = CUBTasks(CUBDataset(root, split='test')) + testset = test_tasks.generate(task_id=task_id, + task='family', + taxonomy_file='passeriformes.txt', + use_species_names=True, + transform=transform_test) + else: + # iNat2018 + from dataset.inat import iNat2018Dataset + tasks_map_file = os.path.join(root, 'inat2018', 'final_tasks_map.json') + tasks_map = load_tasks_map(tasks_map_file) + task_id = tasks_map[config.task_id - NUM_CUB] + + trainset = iNat2018Dataset( + root, split='train', transform=transform_train, task_id=task_id) + testset = iNat2018Dataset( + root, split='val', transform=transform_test, task_id=task_id) + trainset, testset = set_metadata(trainset, testset, config, 'cub_inat2018') + return trainset, testset + + +@_add_dataset +def imat2018fashion(root, config): + NUM_IMAT = 228 + assert 0 <= config.task_id < NUM_IMAT + from dataset.imat import iMat2018FashionDataset, iMat2018FashionTasks + transform_train, transform_test = _get_transforms() + train_tasks = iMat2018FashionTasks( + iMat2018FashionDataset(root, split='train')) + trainset = train_tasks.generate(task_id=config.task_id, + transform=transform_train) + test_tasks = iMat2018FashionTasks( + iMat2018FashionDataset(root, split='validation')) + testset = test_tasks.generate(task_id=config.task_id, + transform=transform_test) + trainset, testset = set_metadata( + trainset, testset, config, 'imat2018fashion') + return trainset, testset + + +@_add_dataset +def split_mnist(root, config): + assert isinstance(config.task_id, tuple) + from dataset.mnist import MNISTDataset, SplitMNISTTask + transform_train, transform_test = _get_mnist_transforms() + train_tasks = SplitMNISTTask(MNISTDataset(root, train=True)) + trainset = train_tasks.generate( + classes=config.task_id, transform=transform_train) + test_tasks = SplitMNISTTask(MNISTDataset(root, train=False)) + testset = test_tasks.generate( + classes=config.task_id, transform=transform_test) + trainset, testset = set_metadata(trainset, testset, config, 'split_mnist') + return trainset, testset + + +@_add_dataset +def split_cifar(root, config): + assert 0 <= config.task_id < 11 + from dataset.cifar import CIFAR10Dataset, CIFAR100Dataset, SplitCIFARTask + transform_train, transform_test = _get_cifar_transforms() + train_tasks = SplitCIFARTask(CIFAR10Dataset( + root, train=True), CIFAR100Dataset(root, train=True)) + trainset = train_tasks.generate( + task_id=config.task_id, transform=transform_train) + test_tasks = SplitCIFARTask(CIFAR10Dataset( + root, train=False), CIFAR100Dataset(root, train=False)) + testset = test_tasks.generate( + task_id=config.task_id, transform=transform_test) + trainset, testset = set_metadata(trainset, testset, config, 'split_cifar') + return trainset, testset + + +@_add_dataset +def cifar10_mnist(root, config): + from dataset.cifar import CIFAR10Dataset + from dataset.mnist import MNISTDataset + from dataset.expansion import UnionClassificationTaskExpander + transform_train, transform_test = _get_cifar_transforms() + trainset = UnionClassificationTaskExpander(merge_duplicate_images=False)( + [CIFAR10Dataset(root, train=True), MNISTDataset(root, train=True, expand=True)], transform=transform_train) + testset = UnionClassificationTaskExpander(merge_duplicate_images=False)( + [CIFAR10Dataset(root, train=False), MNISTDataset(root, train=False, expand=True)], transform=transform_test) + return trainset, testset + + +@_add_dataset +def cifar10(root): + from torchvision.datasets import CIFAR10 + transform = transforms.Compose([ + transforms.Resize(224), + transforms.ToTensor(), + transforms.Normalize((0.5071, 0.4867, 0.4408), + (0.2675, 0.2565, 0.2761)), + ]) + trainset = CIFAR10(root, train=True, transform=transform, download=True) + testset = CIFAR10(root, train=False, transform=transform) + return trainset, testset + + +@_add_dataset +def cifar100(root): + from torchvision.datasets import CIFAR100 + transform = transforms.Compose([ + transforms.Resize(224), + transforms.ToTensor(), + transforms.Normalize((0.5071, 0.4867, 0.4408), + (0.2675, 0.2565, 0.2761)), + ]) + trainset = CIFAR100(root, train=True, transform=transform, download=True) + testset = CIFAR100(root, train=False, transform=transform) + return trainset, testset + + +@_add_dataset +def mnist(root): + from torchvision.datasets import MNIST + transform = transforms.Compose([ + lambda x: x.convert("RGB"), + transforms.Resize(224), + transforms.ToTensor(), + # transforms.Normalize((0.5, 0.5, 0.5), (1., 1., 1.)), + ]) + trainset = MNIST(root, train=True, transform=transform, download=True) + testset = MNIST(root, train=False, transform=transform) + return trainset, testset + + +@_add_dataset +def letters(root): + from torchvision.datasets import EMNIST + transform = transforms.Compose([ + lambda x: x.convert("RGB"), + transforms.Resize(224), + transforms.ToTensor(), + # transforms.Normalize((0.5, 0.5, 0.5), (1., 1., 1.)), + ]) + trainset = EMNIST(root, train=True, split='letters', + transform=transform, download=True) + testset = EMNIST(root, train=False, split='letters', transform=transform) + return trainset, testset + + +@_add_dataset +def kmnist(root): + from torchvision.datasets import KMNIST + transform = transforms.Compose([ + lambda x: x.convert("RGB"), + transforms.Resize(224), + transforms.ToTensor(), + ]) + trainset = KMNIST(root, train=True, transform=transform, download=True) + testset = KMNIST(root, train=False, transform=transform) + return trainset, testset + + +@_add_dataset +def stl10(root): + from torchvision.datasets import STL10 + transform = transforms.Compose([ + transforms.Resize(224), + transforms.ToTensor(), + transforms.Normalize((0.5071, 0.4867, 0.4408), + (0.2675, 0.2565, 0.2761)), + ]) + trainset = STL10(root, split='train', transform=transform, download=True) + testset = STL10(root, split='test', transform=transform) + trainset.targets = trainset.labels + testset.targets = testset.labels + return trainset, testset + + +def get_dataset(root, config=None): + return _DATASETS[config.name](os.path.expanduser(root), config) diff --git a/src/data_meta_map/models.py b/src/data_meta_map/models.py new file mode 100644 index 0000000..32ebcc4 --- /dev/null +++ b/src/data_meta_map/models.py @@ -0,0 +1,94 @@ +# Copyright 2017-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. + + +import torch.utils.model_zoo as model_zoo + +import torchvision.models.resnet as resnet +import torch + +from data_meta_map.task2vec import ProbeNetwork + +_MODELS = {} + + +def _add_model(model_fn): + _MODELS[model_fn.__name__] = model_fn + return model_fn + + +class ResNet(resnet.ResNet, ProbeNetwork): + + def __init__(self, block, layers, num_classes=1000): + super(ResNet, self).__init__(block, layers, num_classes) + # Saves the ordered list of layers. We need this to forward from an arbitrary intermediate layer. + self.layers = [ + self.conv1, self.bn1, self.relu, + self.maxpool, self.layer1, self.layer2, + self.layer3, self.layer4, self.avgpool, + lambda z: torch.flatten(z, 1), self.fc + ] + + @property + def classifier(self): + return self.fc + + # @ProbeNetwork.classifier.setter + # def classifier(self, val): + # self.fc = val + + # Modified forward method that allows to start feeding the cached activations from an intermediate + # layer of the network + def forward(self, x, start_from=0): + """Replaces the default forward so that we can forward features starting from any intermediate layer.""" + for layer in self.layers[start_from:]: + x = layer(x) + return x + + +@_add_model +def resnet18(pretrained=False, num_classes=1000): + """Constructs a ResNet-18 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model: ProbeNetwork = ResNet( + resnet.BasicBlock, [2, 2, 2, 2], num_classes=num_classes) + if pretrained: + state_dict = model_zoo.load_url( + 'https://download.pytorch.org/models/resnet18-5c106cde.pth') + state_dict = {k: v for k, v in state_dict.items() if 'fc' not in k} + model.load_state_dict(state_dict, strict=False) + return model + + +@_add_model +def resnet34(pretrained=False, num_classes=1000): + """Constructs a ResNet-18 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(resnet.BasicBlock, [3, 4, 6, 3], num_classes=num_classes) + if pretrained: + state_dict = model_zoo.load_url( + 'https://download.pytorch.org/models/resnet34-333f7ec4.pth') + state_dict = {k: v for k, v in state_dict.items() if 'fc' not in k} + model.load_state_dict(state_dict, strict=False) + return model + + +def get_model(model_name, pretrained=False, num_classes=1000): + try: + return _MODELS[model_name](pretrained=pretrained, num_classes=num_classes) + except KeyError: + raise ValueError(f"Architecture {model_name} not implemented.") diff --git a/src/data_meta_map/task2vec/__init__.py b/src/data_meta_map/task2vec/__init__.py new file mode 100644 index 0000000..5098145 --- /dev/null +++ b/src/data_meta_map/task2vec/__init__.py @@ -0,0 +1,8 @@ +# src/task2vec/__init__.py +from .task2vec import task2vec, Task2Vec, ProbeNetwork +from .task_similarity import plot_distance_matrix + +__all__ = [ + 'task2vec', + 'plot_distance_matrix', +] diff --git a/src/data_meta_map/task2vec/task2vec.py b/src/data_meta_map/task2vec/task2vec.py new file mode 100644 index 0000000..40923ca --- /dev/null +++ b/src/data_meta_map/task2vec/task2vec.py @@ -0,0 +1,375 @@ +# Copyright 2017-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. + +import itertools +import math +from abc import ABC, abstractmethod + +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np +from tqdm.auto import tqdm +import logging +from data_meta_map.task2vec import variational +from torch.utils.data import DataLoader, Dataset +from torch.optim.optimizer import Optimizer +from data_meta_map.task2vec.utils import AverageMeter, get_error, get_device + + +class Embedding: + def __init__(self, hessian, scale, meta=None): + self.hessian = np.array(hessian) + self.scale = np.array(scale) + self.meta = meta + + +class ProbeNetwork(ABC, nn.Module): + """Abstract class that all probe networks should inherit from. + + This is a standard torch.nn.Module but needs to expose a classifier property that returns the final classicifation + module (e.g., the last fully connected layer). + """ + + @property + @abstractmethod + def classifier(self): + raise NotImplementedError("Override the classifier property to return the submodules of the network that" + " should be interpreted as the classifier") + + @classifier.setter + @abstractmethod + def classifier(self, val): + raise NotImplementedError("Override the classifier setter to set the submodules of the network that" + " should be interpreted as the classifier") + + +def task2vec(probe_network, dataset: Dataset, skip_layers=0, max_samples=None, classifier_opts=None, + method='montecarlo', method_opts=None, loader_opts=None, bernoulli=False, create_final_embedding: bool = False): + task2vec_embedder = Task2Vec(probe_network, skip_layers=skip_layers, max_samples=max_samples, classifier_opts=classifier_opts, + method=method, method_opts=method_opts, loader_opts=loader_opts, bernoulli=bernoulli) + embed = task2vec_embedder.embed( + dataset, create_final_embedding=create_final_embedding) + return embed + + +class Task2Vec: + + def __init__(self, model: ProbeNetwork, skip_layers=0, max_samples=None, classifier_opts=None, + method='montecarlo', method_opts=None, loader_opts=None, bernoulli=False): + if classifier_opts is None: + classifier_opts = {} + if method_opts is None: + method_opts = {} + if loader_opts is None: + loader_opts = {} + assert method in ('variational', 'montecarlo') + assert skip_layers >= 0 + + self.model = model + # Fix batch norm running statistics (i.e., put batch_norm layers in eval mode) + self.model.train() + self.device = get_device(self.model) + self.skip_layers = skip_layers + self.max_samples = max_samples + self.classifier_opts = classifier_opts + self.method = method + self.method_opts = method_opts + self.loader_opts = loader_opts + self.bernoulli = bernoulli + self.loss_fn = nn.CrossEntropyLoss() if not self.bernoulli else nn.BCEWithLogitsLoss() + self.loss_fn = self.loss_fn.to(self.device) + + def embed(self, dataset: Dataset, create_final_embedding: bool = False): + # Cache the last layer features (needed to train the classifier) and (if needed) the intermediate layer features + # so that we can skip the initial layers when computing the embedding + if self.skip_layers > 0: + self._cache_features(dataset, indexes=(self.skip_layers, -1), loader_opts=self.loader_opts, + max_samples=self.max_samples) + else: + self._cache_features(dataset, max_samples=self.max_samples) + # Fits the last layer classifier using cached features + self._fit_classifier(**self.classifier_opts) + + if self.skip_layers > 0: + dataset = torch.utils.data.TensorDataset(self.model.layers[self.skip_layers].input_features, + self.model.layers[-1].targets) + self.compute_fisher(dataset) + embedding = self.extract_embedding(self.model) + if create_final_embedding: + return embedding.hessian/embedding.scale + return embedding + + def montecarlo_fisher(self, dataset: Dataset, epochs: int = 1): + logging.info("Using montecarlo Fisher") + if self.skip_layers > 0: + dataset = torch.utils.data.TensorDataset(self.model.layers[self.skip_layers].input_features, + self.model.layers[-1].targets) + data_loader = _get_loader(dataset, **self.loader_opts) + device = get_device(self.model) + logging.info("Computing Fisher...") + + for p in self.model.parameters(): + p.grad2_acc = torch.zeros_like(p.data) + p.grad_counter = 0 + for k in range(epochs): + logging.info(f"\tepoch {k + 1}/{epochs}") + for i, (data, target) in enumerate(tqdm(data_loader, leave=False, desc="Computing Fisher")): + data = data.to(device) + output = self.model(data, start_from=self.skip_layers) + # The gradients used to compute the FIM needs to be for y sampled from + # the model distribution y ~ p_w(y|x), not for y from the dataset + if self.bernoulli: + target = torch.bernoulli(F.sigmoid(output)).detach() + else: + target = torch.multinomial( + F.softmax(output, dim=-1), 1).detach().view(-1) + loss = self.loss_fn(output, target) + self.model.zero_grad() + loss.backward() + for p in self.model.parameters(): + if p.grad is not None: + p.grad2_acc += p.grad.data ** 2 + p.grad_counter += 1 + for p in self.model.parameters(): + if p.grad_counter == 0: + del p.grad2_acc + else: + p.grad2_acc /= p.grad_counter + logging.info("done") + + def _run_epoch(self, data_loader: DataLoader, model: ProbeNetwork, loss_fn, + optimizer: Optimizer, epoch: int, train: bool = True, + add_compression_loss: bool = False, skip_layers=0, beta=1.0e-7): + metrics = AverageMeter() + device = get_device(model) + + for i, (input, target) in enumerate(tqdm(data_loader, leave=False, desc="Computing Fisher")): + input = input.to(device) + target = target.to(device) + output = model(input, start_from=skip_layers) + + loss = loss_fn(output, target) + lz = beta * variational.get_compression_loss( + model) if add_compression_loss else torch.zeros_like(loss) + loss += lz + + error = get_error(output, target) + + metrics.update(n=input.size(0), loss=loss.item(), + lz=lz.item(), error=error) + if train: + optimizer.zero_grad() + loss.backward() + optimizer.step() + # logging.info( + print( + "{}: [{epoch}] ".format('Epoch' if train else '', epoch=epoch) + + "Data/Batch: {:.3f}/{:.3f} ".format(metrics.avg["data_time"], metrics.avg["batch_time"]) + + "Loss {:.3f} Lz: {:.3f} ".format(metrics.avg["loss"], metrics.avg["lz"]) + + "Error: {:.2f}".format(metrics.avg["error"]) + ) + return metrics.avg + + def variational_fisher(self, dataset: Dataset, epochs=1, beta=1e-7): + logging.info("Training variational fisher...") + parameters = [] + for layer in self.model.layers[self.skip_layers:-1]: + if isinstance(layer, nn.Module): # Skip lambda functions + variational.make_variational(layer) + parameters += variational.get_variational_vars(layer) + bn_params = [] + # Allows batchnorm parameters to change + for m in self.model.modules(): + if isinstance(m, nn.BatchNorm2d): + bn_params += list(m.parameters()) + # Avoids computing the gradients wrt to the weights to save time and memory + for p in self.model.parameters(): + if p not in set(parameters) and p not in set(self.model.classifier.parameters()): + p.old_requires_grad = p.requires_grad + p.requires_grad = False + + optimizer = torch.optim.Adam([ + {'params': parameters}, + {'params': bn_params, 'lr': 5e-4}, + {'params': self.model.classifier.parameters(), 'lr': 5e-4}], + lr=1e-2, betas=(.9, 0.999)) + if self.skip_layers > 0: + dataset = torch.utils.data.TensorDataset(self.model.layers[self.skip_layers].input_features, + self.model.layers[-1].targets) + train_loader = _get_loader(dataset, **self.loader_opts) + + for epoch in range(epochs): + self._run_epoch(train_loader, self.model, self.loss_fn, optimizer, epoch, beta=beta, + add_compression_loss=True, train=True) + + # Resets original value of requires_grad + for p in self.model.parameters(): + if hasattr(p, 'old_requires_grad'): + p.requires_grad = p.old_requires_grad + del p.old_requires_grad + + def compute_fisher(self, dataset: Dataset): + """ + Computes the Fisher Information of the weights of the model wrt the model output on the dataset and stores it. + + The Fisher Information Matrix is defined as: + F = E_{x ~ dataset} E_{y ~ p_w(y|x)} [\nabla_w log p_w(y|x) \nabla_w log p_w(y|x)^t] + where p_w(y|x) is the output probability vector of the network and w are the weights of the network. + Notice that the label y is sampled from the model output distribution and not from the dataset. + + This code only approximate the diagonal of F. The result is stored in the model layers and can be extracted + using the `get_fisher` method. Different approximation methods of the Fisher information matrix are available, + and can be selected in the __init__. + + :param dataset: dataset with the task to compute the Fisher on + """ + if self.method == 'variational': + fisher_fn = self.variational_fisher + elif self.method == 'montecarlo': + fisher_fn = self.montecarlo_fisher + else: + raise ValueError(f"Invalid Fisher method {self.method}") + fisher_fn(dataset, **self.method_opts) + + def _cache_features(self, dataset: Dataset, indexes=(-1,), max_samples=None, loader_opts: dict = None): + logging.info("Caching features...") + if loader_opts is None: + loader_opts = {} + data_loader = DataLoader(dataset, shuffle=False, batch_size=loader_opts.get('batch_size', 64), + num_workers=loader_opts.get('num_workers', 6), drop_last=False) + + device = next(self.model.parameters()).device + + def _hook(layer, inputs): + if not hasattr(layer, 'input_features'): + layer.input_features = [] + layer.input_features.append(inputs[0].data.cpu().clone()) + + hooks = [self.model.layers[index].register_forward_pre_hook(_hook) + for index in indexes] + if max_samples is not None: + n_batches = min( + math.floor(max_samples / data_loader.batch_size) - 1, len(data_loader)) + else: + n_batches = len(data_loader) + targets = [] + + for i, (input, target) in tqdm(enumerate(itertools.islice(data_loader, 0, n_batches)), total=n_batches, + leave=False, + desc="Caching features"): + targets.append(target.clone()) + self.model(input.to(device)) + for hook in hooks: + hook.remove() + for index in indexes: + self.model.layers[index].input_features = torch.cat( + self.model.layers[index].input_features) + self.model.layers[-1].targets = torch.cat(targets) + + def _fit_classifier(self, optimizer='adam', learning_rate=0.0004, weight_decay=0.0001, + epochs=10): + """Fits the last layer of the network using the cached features.""" + logging.info("Fitting final classifier...") + if not hasattr(self.model.classifier, 'input_features'): + raise ValueError( + "You need to run `cache_features` on model before running `fit_classifier`") + targets = self.model.classifier.targets.to(self.device) + features = self.model.classifier.input_features.to(self.device) + + dataset = torch.utils.data.TensorDataset(features, targets) + data_loader = _get_loader(dataset, **self.loader_opts) + + if optimizer == 'adam': + optimizer = torch.optim.Adam( + self.model.fc.parameters(), lr=learning_rate, weight_decay=weight_decay) + elif optimizer == 'sgd': + optimizer = torch.optim.SGD(self.model.fc.parameters( + ), lr=learning_rate, weight_decay=weight_decay) + else: + raise ValueError(f'Unsupported optimizer {optimizer}') + + loss_fn = nn.CrossEntropyLoss() + for epoch in tqdm(range(epochs), desc="Fitting classifier", leave=False): + metrics = AverageMeter() + for data, target in data_loader: + optimizer.zero_grad() + output = self.model.classifier(data) + loss = loss_fn(self.model.classifier(data), target) + error = get_error(output, target) + loss.backward() + optimizer.step() + metrics.update(n=data.size(0), loss=loss.item(), error=error) + logging.info( + f"[epoch {epoch}]: " + "\t".join(f"{k}: {v}" for k, v in metrics.avg.items())) + + def extract_embedding(self, model: ProbeNetwork): + """ + Reads the values stored by `compute_fisher` and returns them in a common format that describes the diagonal of the + Fisher Information Matrix for each layer. + + :param model: + :return: + """ + hess, scale = [], [] + for name, module in model.named_modules(): + if module is model.classifier: + continue + # The variational Fisher approximation estimates the variance of noise that can be added to the weights + # without increasing the error more than a threshold. The inverse of this is proportional to an + # approximation of the hessian in the local minimum. + if hasattr(module, 'logvar0') and hasattr(module, 'loglambda2'): + logvar = module.logvar0.view(-1).detach().cpu().numpy() + hess.append(np.exp(-logvar)) + loglambda2 = module.loglambda2.detach().cpu().numpy() + scale.append(np.exp(-loglambda2).repeat(logvar.size)) + # The other Fisher approximation methods directly approximate the hessian at the minimum + elif hasattr(module, 'weight') and hasattr(module.weight, 'grad2_acc'): + grad2 = module.weight.grad2_acc.cpu().detach().numpy() + filterwise_hess = grad2.reshape( + grad2.shape[0], -1).mean(axis=1) + hess.append(filterwise_hess) + scale.append(np.ones_like(filterwise_hess)) + return Embedding(hessian=np.concatenate(hess), scale=np.concatenate(scale), meta=None) + + +def _get_loader(trainset, testset=None, batch_size=64, num_workers=6, num_samples=10000, drop_last=True): + if getattr(trainset, 'is_multi_label', False): + raise ValueError("Multi-label datasets not supported") + # TODO: Find a way to standardize this + if hasattr(trainset, 'labels'): + labels = trainset.labels + elif hasattr(trainset, 'targets'): + labels = trainset.targets + else: + labels = list(trainset.tensors[1].cpu().numpy()) + num_classes = int(getattr(trainset, 'num_classes', max(labels) + 1)) + class_count = np.eye(num_classes)[labels].sum(axis=0) + weights = 1. / class_count[labels] / num_classes + weights /= weights.sum() + + sampler = torch.utils.data.sampler.WeightedRandomSampler( + weights, num_samples=num_samples) + # No need for mutli-threaded loading if everything is already in memory, + # and would raise an error if TensorDataset is on CUDA + num_workers = num_workers if not isinstance( + trainset, torch.utils.data.TensorDataset) else 0 + trainloader = torch.utils.data.DataLoader(trainset, sampler=sampler, batch_size=batch_size, + num_workers=num_workers, drop_last=drop_last) + + if testset is None: + return trainloader + else: + testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, pin_memory=True, shuffle=False, + num_workers=num_workers) + return trainloader, testloader diff --git a/src/data_meta_map/task2vec/task_similarity.py b/src/data_meta_map/task2vec/task_similarity.py new file mode 100644 index 0000000..5aa302f --- /dev/null +++ b/src/data_meta_map/task2vec/task_similarity.py @@ -0,0 +1,219 @@ +#!/usr/bin/env python3 + +# Copyright 2017-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. + +import itertools +import scipy.spatial.distance as distance +import numpy as np +import copy +import pickle + +_DISTANCES = {} + + +# TODO: Remove methods that do not perform well + +def _register_distance(distance_fn): + _DISTANCES[distance_fn.__name__] = distance_fn + return distance_fn + + +def is_excluded(k): + exclude = ['fc', 'linear'] + return any([e in k for e in exclude]) + + +def load_embedding(filename): + with open(filename, 'rb') as f: + e = pickle.load(f) + return e + + +def get_trivial_embedding_from(e): + trivial_embedding = copy.deepcopy(e) + for l in trivial_embedding['layers']: + a = np.array(l['filter_logvar']) + a[:] = l['filter_lambda2'] + l['filter_logvar'] = list(a) + return trivial_embedding + + +def binary_entropy(p): + from scipy.special import xlogy + return - (xlogy(p, p) + xlogy(1. - p, 1. - p)) + + +def get_layerwise_variance(e, normalized=False): + var = [np.exp(l['filter_logvar']) for l in e['layers']] + if normalized: + var = [v / np.linalg.norm(v) for v in var] + return var + + +def get_variance(e, normalized=False): + var = 1. / np.array(e.hessian) + if normalized: + lambda2 = 1. / np.array(e.scale) + var = var / lambda2 + return var + + +def get_variances(*embeddings, normalized=False): + return [get_variance(e, normalized=normalized) for e in embeddings] + + +def get_hessian(e, normalized=False): + hess = np.array(e.hessian) + if normalized: + scale = np.array(e.scale) + hess = hess / scale + return hess + + +def get_hessians(*embeddings, normalized=False): + return [get_hessian(e, normalized=normalized) for e in embeddings] + + +def get_scaled_hessian(e0, e1): + h0, h1 = get_hessians(e0, e1, normalized=False) + return h0 / (h0 + h1 + 1e-8), h1 / (h0 + h1 + 1e-8) + + +def get_full_kl(e0, e1): + var0, var1 = get_variance(e0), get_variance(e1) + kl0 = .5 * (var0 / var1 - 1 + np.log(var1) - np.log(var0)) + kl1 = .5 * (var1 / var0 - 1 + np.log(var0) - np.log(var1)) + return kl0, kl1 + + +def layerwise_kl(e0, e1): + layers0, layers1 = get_layerwise_variance(e0), get_layerwise_variance(e1) + kl0 = [] + for var0, var1 in zip(layers0, layers1): + kl0.append(np.sum(.5 * (var0 / var1 - 1 + np.log(var1) - np.log(var0)))) + return kl0 + + +def layerwise_cosine(e0, e1): + layers0, layers1 = get_layerwise_variance( + e0, normalized=True), get_layerwise_variance(e1, normalized=True) + res = [] + for var0, var1 in zip(layers0, layers1): + res.append(distance.cosine(var0, var1)) + return res + + +@_register_distance +def kl(e0, e1): + var0, var1 = get_variance(e0), get_variance(e1) + kl0 = .5 * (var0 / var1 - 1 + np.log(var1) - np.log(var0)) + kl1 = .5 * (var1 / var0 - 1 + np.log(var0) - np.log(var1)) + return np.maximum(kl0, kl1).sum() + + +@_register_distance +def asymmetric_kl(e0, e1): + var0, var1 = get_variance(e0), get_variance(e1) + kl0 = .5 * (var0 / var1 - 1 + np.log(var1) - np.log(var0)) + kl1 = .5 * (var1 / var0 - 1 + np.log(var0) - np.log(var1)) + return kl0.sum() + + +@_register_distance +def jsd(e0, e1): + var0, var1 = get_variance(e0), get_variance(e1) + var = .5 * (var0 + var1) + kl0 = .5 * (var0 / var - 1 + np.log(var) - np.log(var0)) + kl1 = .5 * (var1 / var - 1 + np.log(var) - np.log(var1)) + return (.5 * (kl0 + kl1)).mean() + + +@_register_distance +def cosine(e0, e1): + h1, h2 = get_scaled_hessian(e0, e1) + return distance.cosine(h1, h2) + + +@_register_distance +def normalized_cosine(e0, e1): + h1, h2 = get_variances(e0, e1, normalized=True) + return distance.cosine(h1, h2) + + +@_register_distance +def correlation(e0, e1): + v1, v2 = get_variances(e0, e1, normalized=False) + return distance.correlation(v1, v2) + + +@_register_distance +def entropy(e0, e1): + h1, h2 = get_scaled_hessian(e0, e1) + return np.log(2) - binary_entropy(h1).mean() + + +def get_normalized_embeddings(embeddings, normalization=None): + F = [1. / get_variance(e, normalized=False) + if e is not None else None for e in embeddings] + zero_embedding = np.zeros_like([x for x in F if x is not None][0]) + F = np.array([x if x is not None else zero_embedding for x in F]) + # FIXME: compute variance using only valid embeddings + if normalization is None: + normalization = np.sqrt((F ** 2).mean(axis=0, keepdims=True)) + F /= normalization + return F, normalization + + +def pdist(embeddings, distance='cosine'): + distance_fn = _DISTANCES[distance] + n = len(embeddings) + distance_matrix = np.zeros([n, n]) + if distance != 'asymmetric_kl': + for (i, e1), (j, e2) in itertools.combinations(enumerate(embeddings), 2): + distance_matrix[i, j] = distance_fn(e1, e2) + distance_matrix[j, i] = distance_matrix[i, j] + else: + for (i, e1) in enumerate(embeddings): + for (j, e2) in enumerate(embeddings): + distance_matrix[i, j] = distance_fn(e1, e2) + return distance_matrix + + +def cdist(from_embeddings, to_embeddings, distance='cosine'): + distance_fn = _DISTANCES[distance] + distance_matrix = np.zeros([len(from_embeddings), len(to_embeddings)]) + for (i, e1) in enumerate(from_embeddings): + for (j, e2) in enumerate(to_embeddings): + if e1 is None or e2 is None: + continue + distance_matrix[i, j] = distance_fn(e1, e2) + return distance_matrix + + +def plot_distance_matrix(embeddings, labels=None, distance='cosine'): + import seaborn as sns + from scipy.cluster.hierarchy import linkage + from scipy.spatial.distance import squareform + import pandas as pd + import matplotlib.pyplot as plt + distance_matrix = pdist(embeddings, distance=distance) + cond_distance_matrix = squareform(distance_matrix, checks=False) + linkage_matrix = linkage(cond_distance_matrix, + method='complete', optimal_ordering=True) + if labels is not None: + distance_matrix = pd.DataFrame( + distance_matrix, index=labels, columns=labels) + sns.clustermap(distance_matrix, row_linkage=linkage_matrix, + col_linkage=linkage_matrix, cmap='viridis_r') + plt.show() diff --git a/src/data_meta_map/task2vec/utils.py b/src/data_meta_map/task2vec/utils.py new file mode 100644 index 0000000..49b6754 --- /dev/null +++ b/src/data_meta_map/task2vec/utils.py @@ -0,0 +1,65 @@ +# Copyright 2017-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. + +from collections import defaultdict +import torch +import numpy as np + + +class AverageMeter(object): + """Computes and stores the average and current value""" + + def __init__(self): + self.reset() + + def reset(self): + self.val = defaultdict(int) + self.avg = defaultdict(float) + self.sum = defaultdict(int) + self.count = defaultdict(int) + + def update(self, n=1, **val): + for k in val: + self.val[k] = val[k] + self.sum[k] += val[k] * n + self.count[k] += n + self.avg[k] = self.sum[k] / self.count[k] + + +def set_batchnorm_mode(model, train=True): + """Allows to set batch_norm layer mode to train or eval, independendtly on the mode of the model.""" + def _set_batchnorm_mode(module): + if isinstance(module, torch.nn.BatchNorm1d) or isinstance(module, torch.nn.BatchNorm2d): + if train: + module.train() + else: + module.eval() + + model.apply(_set_batchnorm_mode) + + +def get_error(output, target): + pred = output.argmax(dim=1) + correct = pred.eq(target).float().sum() + return float((1. - correct / output.size(0)) * 100.) + + +def adjust_learning_rate(optimizer, epoch, optimizer_cfg): + lr = optimizer_cfg.lr * \ + (0.1 ** np.less(optimizer_cfg.schedule, epoch).sum()) + for param_group in optimizer.param_groups: + param_group['lr'] = lr + + +def get_device(model: torch.nn.Module): + return next(model.parameters()).device diff --git a/src/data_meta_map/task2vec/variational.py b/src/data_meta_map/task2vec/variational.py new file mode 100644 index 0000000..3f1be3e --- /dev/null +++ b/src/data_meta_map/task2vec/variational.py @@ -0,0 +1,131 @@ +# Copyright 2017-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. + +import torch +import torch.nn.functional as F + +from torch.nn.parameter import Parameter + +import types + + +def get_variational_vars(model): + """Returns all variables involved in optimizing the hessian estimation.""" + result = [] + if hasattr(model, 'logvar0'): + result.append(model.logvar0) + result.append(model.loglambda2) + for l in model.children(): + result += get_variational_vars(l) + return result + + +def get_compression_loss(model): + """Get the model loss function for hessian estimation. + + Compute KL divergence assuming a normal posterior and a diagonal normal prior p(w) ~ N(0, lambda**2 * I) + (where lambda is selected independently for each layer and shared by all filters in the same layer). + Recall from the paper that the optimal posterior q(w|D) that minimizes the training loss plus the compression lost + is approximatively given by q(w|D) ~ N(w, F**-1), where F is the Fisher information matrix. + """ + modules = [x for x in model.modules() if hasattr(x, 'logvar0')] + k = sum([x.weight.numel() for x in modules]) + + w_norm2 = sum([x.weight.pow(2).sum() / x.loglambda2.exp() + for x in modules]) + logvar = sum([x.logvar.sum() for x in modules]) + trace = sum([x.logvar.exp().sum() / x.loglambda2.exp() for x in modules]) + lambda2_cost = sum([x.loglambda2 * x.weight.numel() for x in modules]) + + # Standard formula for KL divergence of two normal distributions + # https://en.wikipedia.org/wiki/Multivariate_normal_distribution#Kullback%E2%80%93Leibler_divergence + Lz = kl_divergence = w_norm2 + trace + lambda2_cost - logvar - k + return Lz + + +def variational_forward(module, input): + """Modified forward pass that adds noise to the output.""" + + # Recall that module.logvar0 is created by make_variational() + # (specifically, by add_logvar()) + module.logvar = module.logvar0.expand_as(module.weight) + + var = module.logvar.exp() + + if isinstance(module, torch.nn.modules.conv.Conv2d): + output = F.conv2d(input, module.weight, module.bias, module.stride, + module.padding, module.dilation, module.groups) + # From Variational Dropout and the Local reparametrization trick + # (Kingma et al., 2015) + output_var = F.conv2d(input ** 2 + 1e-2, var, None, module.stride, + module.padding, module.dilation, module.groups) + elif isinstance(module, torch.nn.modules.linear.Linear): + output = F.linear(input, module.weight, module.bias) + output_var = F.linear(input ** 2 + 1e-2, var, None) + else: + raise NotImplementedError( + "Module {} not implemented.".format(type(module))) + + eps = torch.empty_like(output).normal_() + # Local reparemetrization trick + return output + torch.sqrt(output_var) * eps + + +def _reset_logvar(module, variance_scaling=0.05): + if hasattr(module, 'logvar0'): + w = module.weight.data + # Initial ballpark estimate for optimal variance is the variance + # of the weights in the kernel + var = w.view(w.size(0), -1).var(dim=1).view(-1, * + ([1] * (w.ndimension() - 1))) # .expand_as(w) + # Further scale down the variance by some factor + module.logvar0.data[:] = (var * variance_scaling + 1e-8).log() + # Initial guess for lambda is the l2 norm of the weights + module.loglambda2.data = (w.pow(2).mean() + 1e-8).log() + + +def _add_logvar(module): + """Adds a parameter (logvar0) to store the noise variance for the weights. + + Also adds a scalar parameter loglambda2 to store the scaling coefficient + for the layer. + + The variance is assumed to be the same for all weights in the same filter. + The common value is stored in logvar0, which is expanded to the same + dimension as the weight matrix in logvar. + """ + if not hasattr(module, 'weight'): + return + if module.weight.data.ndimension() < 2: + return + if not hasattr(module, 'logvar0'): + w = module.weight.data + # w is of shape NUM_OUT x NUM_IN x K_h X K_w + var = w.view(w.size(0), -1).var(dim=1).view(-1, + *([1] * (w.ndimension() - 1))) + # var is of shape NUM_OUT x 1 x 1 x 1 + # (so that it can be expanded to the same size as w by torch.expand_as()) + # The content does not matter since we will reset it later anyway + module.logvar0 = Parameter(var.log()) + # log(lambda**2) is a scalar shared by all weights in the layer + module.loglambda2 = Parameter(w.pow(2).mean().log()) + module.logvar = module.logvar0.expand_as(module.weight) + _reset_logvar(module) + + +def make_variational(model): + """Replaces the forward pass of the model layers to add noise.""" + model.apply(_add_logvar) + for m in model.modules(): + if hasattr(m, 'logvar0'): + m.forward = types.MethodType(variational_forward, m) diff --git a/src/mylib/__init__.py b/src/mylib/__init__.py deleted file mode 100755 index b8023d8..0000000 --- a/src/mylib/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__version__ = '0.0.1' diff --git a/src/mylib/train.py b/src/mylib/train.py deleted file mode 100755 index 15f6729..0000000 --- a/src/mylib/train.py +++ /dev/null @@ -1,132 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -''' -The :mod:`mylib.train` contains classes: - -- :class:`mylib.train.Trainer` - -The :mod:`mylib.train` contains functions: - -- :func:`mylib.train.cv_parameters` -''' -from __future__ import print_function - -__docformat__ = 'restructuredtext' - -import numpy -from scipy.special import expit -from sklearn.linear_model import LogisticRegression -from sklearn.model_selection import train_test_split -from sklearn.metrics import classification_report - -class SyntheticBernuliDataset(object): - r'''Base class for synthetic dataset.''' - def __init__(self, n=10, m=100, seed=42): - r'''Constructor method - - :param n: the number of feature - :type n: int - :param m: the number of object - :type m: int - :param seed: seed for random state. - :type seed: int - ''' - rs = numpy.random.RandomState(seed) - - self.w = rs.randn(n) # Генерим вектор параметров из нормального распределения - self.X = rs.randn(m, n) # Генерим вектора признаков из нормального распределения - - self.y = rs.binomial(1, expit(self.X@self.w)) # Гипотеза порождения данных - целевая переменная из схемы Бернули - - -class Trainer(object): - r'''Base class for all trainer.''' - def __init__(self, model, X, Y, seed=42): - r'''Constructor method - - :param model: The class with fit and predict methods. - :type model: object - - :param X: The array of shape - `num_elements` :math:`\times` `num_feature`. - :type X: numpy.array - :param Y: The array of shape - `num_elements` :math:`\times` `num_answers`. - :type Y: numpy.array - - :param seed: Seed for random state. - :type seed: int - ''' - self.model = model - self.seed = seed - ( - self.X_train, - self.X_val, - self.Y_train, - self.Y_val - ) = train_test_split(X, Y, random_state=self.seed) - - def train(self): - r''' Train model - ''' - self.model.fit(self.X_train, self.Y_train) - - def eval(self, output_dict=False): - r'''Evaluate model for initial validadtion dataset. - ''' - return classification_report( - self.Y_val, - self.model.predict( - self.X_val), output_dict=output_dict) - - def test(self, X, Y, output_dict=False): - r"""Evaluate model for given dataset. - - :param X: The array of shape - `num_elements` :math:`\times` `num_feature`. - :type X: numpy.array - :param Y: The array of shape - `num_elements` :math:`\times` `num_answers`. - :type Y: numpy.array - """ - return classification_report( - Y, self.model.predict(X), output_dict=output_dict) - - -def cv_parameters(X, Y, seed=42, minimal=0.1, maximum=25, count=100): - r'''Function for the experiment with different regularisation parameters - and return accuracy and weidth for LogisticRegression for each parameter. - - :param X: The array of shape - `num_elements` :math:`\times` `num_feature`. - :type X: numpy.array - :param Y: The array of shape - `num_elements` :math:`\times` `num_answers`. - :type Y: numpy.array - - :param seed: Seed for random state. - :type seed: int - :param minimal: Minimum value for the Cs linspace. - :type minimal: int - :param maximum: Maximum value for the Cs linspace. - :type maximum: int - :param count: Number of the Cs points. - :type count: int - ''' - - Cs = numpy.linspace(minimal, maximum, count) - parameters = [] - accuracy = [] - for C in Cs: - trainer = Trainer( - LogisticRegression(penalty='l1', solver='saga', C=1/C), - X, Y, - ) - - trainer.train() - - accuracy.append(trainer.eval(output_dict=True)['accuracy']) - - parameters.extend(trainer.model.coef_) - - return Cs, accuracy, parameters diff --git a/src/requirements.txt b/src/requirements.txt deleted file mode 100755 index 3ff5802..0000000 --- a/src/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -numpy==1.21.5 -scipy==1.4.1 -scikit-learn==1.0.2 \ No newline at end of file diff --git a/src/setup.py b/src/setup.py deleted file mode 100755 index f9c5472..0000000 --- a/src/setup.py +++ /dev/null @@ -1,34 +0,0 @@ -import io -import re -from setuptools import setup, find_packages - -from mylib import __version__ - -def read(file_path): - with io.open(file_path, 'r', encoding='utf-8') as f: - return f.read() - - -readme = read('README.rst') -# вычищаем локальные версии из файла requirements (согласно PEP440) -requirements = '\n'.join( - re.findall(r'^([^\s^+]+).*$', - read('requirements.txt'), - flags=re.MULTILINE)) - - -setup( - # metadata - name='mylib', - version=__version__, - license='MIT', - author='Andrey Grabovoy', - author_email="grabovoy.av@phystech.edu", - description='mylib, python package', - long_description=readme, - url='https://github.com/Intelligent-Systems-Phystech/ProjectTemplate', - - # options - packages=find_packages(), - install_requires=requirements, -) From be7b7099bd59a15886a014b9899dbc93c4fead1b Mon Sep 17 00:00:00 2001 From: Vladislav Meshkov Date: Sun, 1 Mar 2026 18:13:36 +0300 Subject: [PATCH 2/2] rm extra logs --- .gitignore | 1 + demo/task2vec/wget-log | 15 --------------- 2 files changed, 1 insertion(+), 15 deletions(-) delete mode 100644 demo/task2vec/wget-log diff --git a/.gitignore b/.gitignore index 6027723..b520deb 100644 --- a/.gitignore +++ b/.gitignore @@ -121,3 +121,4 @@ logs/ *.csv !.dvc data +wget-log diff --git a/demo/task2vec/wget-log b/demo/task2vec/wget-log deleted file mode 100644 index 4e6c8d6..0000000 --- a/demo/task2vec/wget-log +++ /dev/null @@ -1,15 +0,0 @@ ---2026-03-01 18:02:24-- http://codh.rois.ac.jp/kmnist/dataset/kmnist/train-labels-idx1-ubyte.gz -Распознаётся codh.rois.ac.jp (codh.rois.ac.jp)… 136.187.88.58 -Подключение к codh.rois.ac.jp (codh.rois.ac.jp)|136.187.88.58|:80... ошибка: Время ожидания соединения истекло. -Продолжение попыток. - ---2026-03-01 18:04:38-- (попытка: 2) http://codh.rois.ac.jp/kmnist/dataset/kmnist/train-labels-idx1-ubyte.gz -Подключение к codh.rois.ac.jp (codh.rois.ac.jp)|136.187.88.58|:80... ошибка: Время ожидания соединения истекло. -Продолжение попыток. - ---2026-03-01 18:06:54-- (попытка: 3) http://codh.rois.ac.jp/kmnist/dataset/kmnist/train-labels-idx1-ubyte.gz -Подключение к codh.rois.ac.jp (codh.rois.ac.jp)|136.187.88.58|:80... ошибка: Время ожидания соединения истекло. -Продолжение попыток. - ---2026-03-01 18:09:10-- (попытка: 4) http://codh.rois.ac.jp/kmnist/dataset/kmnist/train-labels-idx1-ubyte.gz -Подключение к codh.rois.ac.jp (codh.rois.ac.jp)|136.187.88.58|:80... \ No newline at end of file